def nextafter(x, direction, dtype, itemsize): """Return the next representable neighbor of x in the appropriate direction.""" assert direction in [-1, 0, +1] assert dtype.kind == "S" or type(x) in (bool, int, int, float) if direction == 0: return x if dtype.kind == "S": return string_next_after(x, direction, itemsize) if dtype.kind in ['b']: return bool_type_next_after(x, direction, itemsize) elif dtype.kind in ['i', 'u']: return int_type_next_after(x, direction, itemsize) elif dtype.kind == "f": if direction < 0: return numpy.nextafter(x, x - 1) else: return numpy.nextafter(x, x + 1) # elif dtype.name == "float32": # if direction < 0: # return PyNextAfterF(x,x-1) # else: # return PyNextAfterF(x,x + 1) # elif dtype.name == "float64": # if direction < 0: # return PyNextAfter(x,x-1) # else: # return PyNextAfter(x,x + 1) raise TypeError("data type ``%s`` is not supported" % dtype)
def make_strictly_feasible(x, lb, ub, rstep=1e-10): """Shift a point to the interior of a feasible region. Each element of the returned vector is at least at a relative distance `rstep` from the closest bound. If ``rstep=0`` then `np.nextafter` is used. """ x_new = x.copy() active = find_active_constraints(x, lb, ub, rstep) lower_mask = np.equal(active, -1) upper_mask = np.equal(active, 1) if rstep == 0: x_new[lower_mask] = np.nextafter(lb[lower_mask], ub[lower_mask]) x_new[upper_mask] = np.nextafter(ub[upper_mask], lb[upper_mask]) else: x_new[lower_mask] = (lb[lower_mask] + rstep * np.maximum(1, np.abs(lb[lower_mask]))) x_new[upper_mask] = (ub[upper_mask] - rstep * np.maximum(1, np.abs(ub[upper_mask]))) tight_bounds = (x_new < lb) | (x_new > ub) x_new[tight_bounds] = 0.5 * (lb[tight_bounds] + ub[tight_bounds]) return x_new
def __init__(self, value_1, value_2=None): # use Decimal as exact value holder (because of arbitrary precision) from decimal import Decimal # nextafter(x, y) returns next machine number after x in direction of y from numpy import nextafter # creating interval from middle value if not value_2: exact = Decimal(value_1) float_repr = Decimal("{0:0.70f}".format(float(exact))) if exact == float_repr: self.lv = float(float_repr) self.rv = float(float_repr) elif exact > float_repr: self.lv = float(float_repr) self.rv = nextafter(self.lv, float('Inf')) elif exact < float_repr: self.rv = float(float_repr) self.lv = nextafter(self.rv, -float('Inf')) # creating interval from left and right edge else: exact_left = Decimal(value_1) exact_right = Decimal(value_2) if exact_left > exact_right: exact_left, exact_right = exact_right, exact_left float_repr_left = Decimal(float(exact_left)) float_repr_right = Decimal(float(exact_right)) if exact_left < float_repr_left: self.lv = nextafter(float(float_repr_left), -float('Inf')) else: self.lv = float(float_repr_left) if exact_right > float_repr_right: self.rv = nextafter(float(float_repr_right), float('Inf')) else: self.rv = float(float_repr_right)
def _logpmf(self, x, mu, alpha, p): mu_p = mu ** (p - 1.) a1 = np.maximum(np.nextafter(0, 1), 1 + alpha * mu_p) a2 = np.maximum(np.nextafter(0, 1), mu + (a1 - 1.) * x) logpmf_ = np.log(mu) + (x - 1.) * np.log(a2) logpmf_ -= x * np.log(a1) + gammaln(x + 1.) + a2 / a1 return logpmf_
def make_strictly_feasible(x, lb, ub, rstep=0): """Shift the point in the slightest possible way to the interior. If ``rstep=0`` the function uses np.nextafter, otherwise `rstep` is multiplied by absolute value of the bound. The utility of this function is questionable to me. Maybe bigger shifts should be used, or maybe this function is not necessary at all despite theoretical requirement of our interior point algorithm. """ x_new = x.copy() m = x <= lb if rstep == 0: x_new[m] = np.nextafter(lb[m], ub[m]) else: x_new[m] = lb[m] + rstep * (1 + np.abs(lb[m])) m = x >= ub if rstep == 0: x_new[m] = np.nextafter(ub[m], lb[m]) else: x_new[m] = ub[m] - rstep * (1 + np.abs(ub[m])) return x_new
def test_half_fpe(self): oldsettings = np.seterr(all="raise") try: sx16 = np.array((1e-4,), dtype=float16) bx16 = np.array((1e4,), dtype=float16) sy16 = float16(1e-4) by16 = float16(1e4) # Underflow errors assert_raises_fpe("underflow", lambda a, b: a * b, sx16, sx16) assert_raises_fpe("underflow", lambda a, b: a * b, sx16, sy16) assert_raises_fpe("underflow", lambda a, b: a * b, sy16, sx16) assert_raises_fpe("underflow", lambda a, b: a * b, sy16, sy16) assert_raises_fpe("underflow", lambda a, b: a / b, sx16, bx16) assert_raises_fpe("underflow", lambda a, b: a / b, sx16, by16) assert_raises_fpe("underflow", lambda a, b: a / b, sy16, bx16) assert_raises_fpe("underflow", lambda a, b: a / b, sy16, by16) assert_raises_fpe("underflow", lambda a, b: a / b, float16(2.0 ** -14), float16(2 ** 11)) assert_raises_fpe("underflow", lambda a, b: a / b, float16(-2.0 ** -14), float16(2 ** 11)) assert_raises_fpe("underflow", lambda a, b: a / b, float16(2.0 ** -14 + 2 ** -24), float16(2)) assert_raises_fpe("underflow", lambda a, b: a / b, float16(-2.0 ** -14 - 2 ** -24), float16(2)) assert_raises_fpe("underflow", lambda a, b: a / b, float16(2.0 ** -14 + 2 ** -23), float16(4)) # Overflow errors assert_raises_fpe("overflow", lambda a, b: a * b, bx16, bx16) assert_raises_fpe("overflow", lambda a, b: a * b, bx16, by16) assert_raises_fpe("overflow", lambda a, b: a * b, by16, bx16) assert_raises_fpe("overflow", lambda a, b: a * b, by16, by16) assert_raises_fpe("overflow", lambda a, b: a / b, bx16, sx16) assert_raises_fpe("overflow", lambda a, b: a / b, bx16, sy16) assert_raises_fpe("overflow", lambda a, b: a / b, by16, sx16) assert_raises_fpe("overflow", lambda a, b: a / b, by16, sy16) assert_raises_fpe("overflow", lambda a, b: a + b, float16(65504), float16(17)) assert_raises_fpe("overflow", lambda a, b: a - b, float16(-65504), float16(17)) assert_raises_fpe("overflow", np.nextafter, float16(65504), float16(np.inf)) assert_raises_fpe("overflow", np.nextafter, float16(-65504), float16(-np.inf)) assert_raises_fpe("overflow", np.spacing, float16(65504)) # Invalid value errors assert_raises_fpe("invalid", np.divide, float16(np.inf), float16(np.inf)) assert_raises_fpe("invalid", np.spacing, float16(np.inf)) assert_raises_fpe("invalid", np.spacing, float16(np.nan)) assert_raises_fpe("invalid", np.nextafter, float16(np.inf), float16(0)) assert_raises_fpe("invalid", np.nextafter, float16(-np.inf), float16(0)) assert_raises_fpe("invalid", np.nextafter, float16(0), float16(np.nan)) # These should not raise float16(65472) + float16(32) float16(2 ** -13) / float16(2) float16(2 ** -14) / float16(2 ** 10) np.spacing(float16(-65504)) np.nextafter(float16(65504), float16(-np.inf)) np.nextafter(float16(-65504), float16(np.inf)) float16(2 ** -14) / float16(2 ** 10) float16(-2 ** -14) / float16(2 ** 10) float16(2 ** -14 + 2 ** -23) / float16(2) float16(-2 ** -14 - 2 ** -23) / float16(2) finally: np.seterr(**oldsettings)
def forward_cpu(self, inputs): U, points = inputs batch_size, height, width = U.shape # Points just on the boundary are slightly (i.e. nextafter in float32) # moved inward to simplify the implementation points = points.copy() on_boundary = (points == 0) points[on_boundary] = np.nextafter(points[on_boundary], np.float32(1)) x = points[:, 0] y = points[:, 1] on_boundary = (x == (width - 1)) x[on_boundary] = np.nextafter(x[on_boundary], np.float32(0)) on_boundary = (y == (height - 1)) y[on_boundary] = np.nextafter(y[on_boundary], np.float32(0)) batch_axis = np.expand_dims(np.arange(batch_size), 1) points_floor = np.floor(points) x_l = points_floor[:, 0].astype(np.int32) y_l = points_floor[:, 1].astype(np.int32) x_l = np.clip(x_l, 0, width - 1) y_l = np.clip(y_l, 0, height - 1) x_h = np.clip(x_l + 1, 0, width - 1) y_h = np.clip(y_l + 1, 0, height - 1) weight = 1.0 - (points - points_floor) weight_x_l = weight[:, 0] weight_y_l = weight[:, 1] weight_x_h = 1 - weight_x_l weight_y_h = 1 - weight_y_l # remove points outside of the (source) image region # by setting their weights to 0 x_invalid = np.logical_or(x < 0, (width - 1) < x) y_invalid = np.logical_or(y < 0, (height - 1) < y) invalid = np.logical_or(x_invalid, y_invalid) weight_x_l[invalid] = 0 weight_y_l[invalid] = 0 weight_x_h[invalid] = 0 weight_y_h[invalid] = 0 U_y_l = (weight_x_l * U[batch_axis, y_l, x_l] + weight_x_h * U[batch_axis, y_l, x_h]) U_y_h = (weight_x_l * U[batch_axis, y_h, x_l] + weight_x_h * U[batch_axis, y_h, x_h]) V = weight_y_l * U_y_l + weight_y_h * U_y_h self.x_l = x_l self.y_l = y_l self.x_h = x_h self.y_h = y_h self.weight_x_l = weight_x_l self.weight_y_l = weight_y_l self.weight_x_h = weight_x_h self.weight_y_h = weight_y_h return (V,)
def _test_nextafter(t): one = t(1) two = t(2) zero = t(0) eps = np.finfo(t).eps assert_(np.nextafter(one, two) - one == eps) assert_(np.nextafter(one, zero) - one < 0) assert_(np.isnan(np.nextafter(np.nan, one))) assert_(np.isnan(np.nextafter(one, np.nan))) assert_(np.nextafter(one, one) == one)
def test_nextafter(): for t in [np.float32, np.float64, np.longdouble]: one = t(1) two = t(2) zero = t(0) eps = np.finfo(t).eps assert np.nextafter(one, two) - one == eps assert np.nextafter(one, zero) - one < 0 assert np.isnan(np.nextafter(np.nan, one)) assert np.isnan(np.nextafter(one, np.nan)) assert np.nextafter(one, one) == one
def ranges_to_weight_table(ranges): """ Create a table of weights from ranges. Include only edge points of ranges. Include each edge point twice: once as values within the range and zero value outside the range (with this output the weights can easily be interpolated). Weights of overlapping intervals are summed. Assumes 64-bit floats. :param ranges: list of triples (edge1, edge2, weight) :return: an Orange.data.Table """ values = {} inf = float("inf") minf = float("-inf") def dict_to_numpy(d): x = [] y = [] for a, b in d.items(): x.append(a) y.append(b) return np.array(x), np.array([y]) for l, r, w in ranges: l, r = min(l, r), max(l, r) positions = [nextafter(l, minf), l, r, nextafter(r, inf)] weights = [0., float(w), float(w), 0.] all_positions = list(set(positions) | set(values)) # new and old positions # current values on all position x, y = dict_to_numpy(values) current = interp1d_with_unknowns_numpy(x, y, all_positions)[0] current[np.isnan(current)] = 0 # new values on all positions new = interp1d_with_unknowns_numpy(np.array(positions), np.array([weights]), all_positions)[0] new[np.isnan(new)] = 0 # update values for p, f in zip(all_positions, current + new): values[p] = f x, y = dict_to_numpy(values) dom = Orange.data.Domain([Orange.data.ContinuousVariable(name=str(float(a))) for a in x]) data = Orange.data.Table.from_numpy(dom, y) return data
def test_half_conversion_rounding(self, float_t, shift, offset): # Assumes that round to even is used during casting. max_pattern = np.float16(np.finfo(np.float16).max).view(np.uint16) # Test all (positive) finite numbers, denormals are most interesting # however: f16s_patterns = np.arange(0, max_pattern+1, dtype=np.uint16) f16s_float = f16s_patterns.view(np.float16).astype(float_t) # Shift the values by half a bit up or a down (or do not shift), if shift == "up": f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[1:] elif shift == "down": f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[:-1] else: f16s_float = f16s_float[1:-1] # Increase the float by a minimal value: if offset == "up": f16s_float = np.nextafter(f16s_float, float_t(1e50)) elif offset == "down": f16s_float = np.nextafter(f16s_float, float_t(-1e50)) # Convert back to float16 and its bit pattern: res_patterns = f16s_float.astype(np.float16).view(np.uint16) # The above calculations tries the original values, or the exact # mid points between the float16 values. It then further offsets them # by as little as possible. If no offset occurs, "round to even" # logic will be necessary, an arbitrarily small offset should cause # normal up/down rounding always. # Calculate the expecte pattern: cmp_patterns = f16s_patterns[1:-1].copy() if shift == "down" and offset != "up": shift_pattern = -1 elif shift == "up" and offset != "down": shift_pattern = 1 else: # There cannot be a shift, either shift is None, so all rounding # will go back to original, or shift is reduced by offset too much. shift_pattern = 0 # If rounding occurs, is it normal rounding or round to even? if offset is None: # Round to even occurs, modify only non-even, cast to allow + (-1) cmp_patterns[0::2].view(np.int16)[...] += shift_pattern else: cmp_patterns.view(np.int16)[...] += shift_pattern assert_equal(res_patterns, cmp_patterns)
def compute_likelihoods(self, PLCs, FLCs): K = self.K() N = self.N() future_given_state_probs = np.nextafter(self.f_hat_conditional_densities(FLCs, label="PDF_FLCs"), 1.) state_given_past_probs = np.nextafter(np.vstack([self.PLC_densities(j, PLCs) for j in range(K)]), 1.).T ''' Weight by state likelihood ''' n_hats = self.W.sum(axis=0) / N state_given_past_probs *= n_hats state_given_past_probs = np.nextafter(state_given_past_probs, 1.) ''' Normalize ''' state_given_past_probs /= np.expand_dims(np.sum(state_given_past_probs, axis=1), axis=1) ''' Return mixed likelihoods ''' return np.nextafter(np.sum(np.multiply(state_given_past_probs, future_given_state_probs), axis=1), 1.)
def testBearingToValueOnEquator(self): """Test if bearingTo() returns the expected value from a point on the equator """ lon0 = 90.0 lat0 = 0.0 # These tests only work from the equator. arcLen = 10.0 trials = [ # Along celestial equator dict(lon=lon0, lat=lat0, bearing=0.0, lonEnd=lon0+arcLen, latEnd=lat0), # Along a meridian dict(lon=lon0, lat=lat0, bearing=90.0, lonEnd=lon0, latEnd=lat0+arcLen), # 180 degree arc (should go to antipodal point) dict(lon=lon0, lat=lat0, bearing=45.0, lonEnd=lon0+180.0, latEnd=-lat0), # dict(lon=lon0, lat=lat0, bearing=45.0, lonEnd=lon0+90.0, latEnd=lat0 + 45.0), dict(lon=lon0, lat=lat0, bearing=225.0, lonEnd=lon0-90.0, latEnd=lat0 - 45.0), dict(lon=lon0, lat=np.nextafter(-90.0, inf), bearing=90.0, lonEnd=lon0, latEnd=0.0), dict(lon=lon0, lat=np.nextafter(-90.0, inf), bearing=0.0, lonEnd=lon0 + 90.0, latEnd=0.0), # Argument at a pole should work dict(lon=lon0, lat=lat0, bearing=270.0, lonEnd=lon0, latEnd=-90.0), # Support for non-finite values dict(lon=lon0, lat=nan, bearing=nan, lonEnd=lon0, latEnd=45.0), dict(lon=lon0, lat=lat0, bearing=nan, lonEnd=nan, latEnd=90.0), dict(lon=inf, lat=lat0, bearing=nan, lonEnd=lon0, latEnd=42.0), dict(lon=lon0, lat=lat0, bearing=nan, lonEnd=-inf, latEnd=42.0), ] for trial in trials: origin = SpherePoint(trial['lon']*degrees, trial['lat']*degrees) end = SpherePoint(trial['lonEnd']*degrees, trial['latEnd']*degrees) bearing = origin.bearingTo(end) self.assertIsInstance(bearing, geom.Angle) if origin.isFinite() and end.isFinite(): self.assertGreaterEqual(bearing.asDegrees(), 0.0) self.assertLess(bearing.asDegrees(), 360.0) if origin.separation(end).asDegrees() != 180.0: if not math.isnan(trial['bearing']): self.assertAlmostEqual( trial['bearing'], bearing.asDegrees(), 12) else: self.assertTrue(math.isnan(bearing.asRadians()))
def similarity(v1, v2): # v1 and v2 are vectors eps = np.nextafter(0, 1) # smallest float above zero dot = np.dot(v1, v2) dot /= max(npext.norm(v1), eps) dot /= max(npext.norm(v2), eps) return dot
def test_to_corr(self): # Check some corner cases in to_corr # ajj == 1 m = np.array([[0.1, 0], [0, 1]], dtype=float) m = random_correlation._to_corr(m) assert_allclose(m, np.array([[1, 0], [0, 0.1]])) # Floating point overflow; fails to compute the correct # rotation, but should still produce some valid rotation # rather than infs/nans with np.errstate(over='ignore'): g = np.array([[0, 1], [-1, 0]]) m0 = np.array([[1e300, 0], [0, np.nextafter(1, 0)]], dtype=float) m = random_correlation._to_corr(m0.copy()) assert_allclose(m, g.T.dot(m0).dot(g)) m0 = np.array([[0.9, 1e300], [1e300, 1.1]], dtype=float) m = random_correlation._to_corr(m0.copy()) assert_allclose(m, g.T.dot(m0).dot(g)) # Zero discriminant; should set the first diag entry to 1 m0 = np.array([[2, 1], [1, 2]], dtype=float) m = random_correlation._to_corr(m0.copy()) assert_allclose(m[0,0], 1) # Slightly negative discriminant; should be approx correct still m0 = np.array([[2 + 1e-7, 1], [1, 2]], dtype=float) m = random_correlation._to_corr(m0.copy()) assert_allclose(m[0,0], 1)
def sample_n(self, n, seed=None, name="sample_n"): """Sample `n` observations from the Laplace Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. seed: Python integer, the random seed. name: The name to give this op. Returns: samples: `[n, ...]`, a `Tensor` of `n` samples for each of the distributions determined by broadcasting the parameters. """ with ops.name_scope(self.name): with ops.name_scope(name, values=[self._loc, self._scale, n]): n = ops.convert_to_tensor(n) n_val = tensor_util.constant_value(n) shape = array_ops.concat(0, ([n], self.batch_shape())) # Sample uniformly-at-random from the open-interval (-1, 1). uniform_samples = random_ops.random_uniform( shape=shape, minval=np.nextafter(self.dtype.as_numpy_dtype(-1.), self.dtype.as_numpy_dtype(0.)), maxval=self.dtype.as_numpy_dtype(1.), dtype=self.dtype, seed=seed) # Provide some hints to shape inference inferred_shape = tensor_shape.vector(n_val).concatenate( self.get_batch_shape()) uniform_samples.set_shape(inferred_shape) return (self._loc - self._scale * math_ops.sign(uniform_samples) * math_ops.log(1. - math_ops.abs(uniform_samples)))
def _compute_lwork(routine, *args, **kwargs): """ Round floating-point lwork returned by lapack to integer. Several LAPACK routines compute optimal values for LWORK, which they return in a floating-point variable. However, for large values of LWORK, single-precision floating point is not sufficient to hold the exact value --- some LAPACK versions (<= 3.5.0 at least) truncate the returned integer to single precision and in some cases this can be smaller than the required value. """ lwork, info = routine(*args, **kwargs) if info != 0: raise ValueError("Internal work array size computation failed: %d" % (info,)) lwork = lwork.real if getattr(routine, "dtype", None) == _np.float32: # Single-precision routine -- take next fp value to work # around possible truncation in LAPACK code lwork = _np.nextafter(_np.float32(lwork), _np.float32(_np.inf)) lwork = int(lwork) if lwork < 0 or lwork > _np.iinfo(_np.int32).max: raise ValueError( "Too large work array required -- computation cannot " "be performed with standard 32-bit LAPACK." ) return lwork
def _addMinMaxToStyle(theStyle): """Add a min and max to each style class in a style dictionary. When InaSAFE provides style classes they are specific values, not ranges. However QGIS wants to work in ranges, so this helper will address that by updating the dictionary to include a min max value for each class. It is assumed that we will start for 0 as the min for the first class and the quantity of each class shall constitute the max. For all other classes , min shall constitute the smalles increment to a float that can meaningfully be made by python (as determined by numpy.nextafter()). Args: style: list - A list of dictionaries of the form as in the example below. Returns: dict: A new dictionary list with min max attributes added to each entry. Example input: style_classes = [dict(colour='#38A800', quantity=2, transparency=0), dict(colour='#38A800', quantity=5, transparency=50), dict(colour='#79C900', quantity=10, transparency=50), dict(colour='#CEED00', quantity=20, transparency=50), dict(colour='#FFCC00', quantity=50, transparency=34), dict(colour='#FF6600', quantity=100, transparency=77), dict(colour='#FF0000', quantity=200, transparency=24), dict(colour='#7A0000', quantity=300, transparency=22)] Example output: style_classes = [dict(colour='#38A800', quantity=2, transparency=0, min=0, max=2), dict(colour='#38A800', quantity=5, transparency=50, min=2.0000000000002, max=5), ), dict(colour='#79C900', quantity=10, transparency=50, min=5.0000000000002, max=10),), dict(colour='#CEED00', quantity=20, transparency=50, min=5.0000000000002, max=20),), dict(colour='#FFCC00', quantity=50, transparency=34, min=20.0000000000002, max=50),), dict(colour='#FF6600', quantity=100, transparency=77, min=50.0000000000002, max=100),), dict(colour='#FF0000', quantity=200, transparency=24, min=100.0000000000002, max=200),), dict(colour='#7A0000', quantity=300, transparency=22, min=200.0000000000002, max=300),)] """ myNewStyles = [] myLastMax = 0.0 for myClass in theStyle: myQuantity = float(myClass['quantity']) myClass['min'] = myLastMax myClass['max'] = myQuantity myLastMax = numpy.nextafter(myQuantity, sys.float_info.max) myNewStyles.append(myClass) return myNewStyles
def subscribe(self, t=None): '''subscribe to events after t''' if t is None: i = len(deltas) cur = dict((key, util.state.history(key)[-1]) for key in util.state) j = len(deltas) prefix = [] #hide race conditions for t, key, val in deltas[i:j]: if t > cur[key][0]: cur[key] = t, val for key in cur: t, val = cur[key] prefix.append((key, val, t)) t = 0 if j: t = deltas[j-1][0] return {'t':t, 'deltas':prefix} t = float(t) while True:#completely different logic if deltas and deltas[-1][0] > t: i = bisect.bisect_left(deltas, (numpy.nextafter(t, t+1),)) j = len(deltas) return {'t':deltas[j-1][0], 'deltas':[(key, val, t) for (t, key, val) in deltas[i:j]]} try: deltas_change.acquire() deltas_change.wait(25) finally: deltas_change.release() return {'t':t, 'deltas':[]}
def _sample_n(self, n, seed=None): shape = array_ops.concat(0, ([n], array_ops.shape(self.mean()))) np_dtype = self.dtype.as_numpy_dtype() minval = np.nextafter(np_dtype(0), np_dtype(1)) uniform = random_ops.random_uniform(shape=shape, minval=minval, maxval=1, dtype=self.dtype, seed=seed) sampled = -math_ops.log(-math_ops.log(uniform)) return sampled * self.scale + self.loc
def test_nextafter_vs_spacing(): # XXX: spacing does not handle long double yet for t in [np.float32, np.float64]: for _f in [1, 1e-5, 1000]: f = t(_f) f1 = t(_f + 1) assert_(np.nextafter(f, f1) - f == np.spacing(f))
def _compute_lwork(routine, *args, **kwargs): """ Round floating-point lwork returned by lapack to integer. Several LAPACK routines compute optimal values for LWORK, which they return in a floating-point variable. However, for large values of LWORK, single-precision floating point is not sufficient to hold the exact value --- some LAPACK versions (<= 3.5.0 at least) truncate the returned integer to single precision and in some cases this can be smaller than the required value. """ wi = routine(*args, **kwargs) if len(wi) < 2: raise ValueError("") info = wi[-1] if info != 0: raise ValueError("Internal work array size computation failed: " "%d" % (info,)) lwork = [w.real for w in wi[:-1]] dtype = getattr(routine, "dtype", None) if dtype == _np.float32 or dtype == _np.complex64: # Single-precision routine -- take next fp value to work # around possible truncation in LAPACK code lwork = _np.nextafter(lwork, _np.inf, dtype=_np.float32) lwork = _np.array(lwork, _np.int64) if _np.any(_np.logical_or(lwork < 0, lwork > _np.iinfo(_np.int32).max)): raise ValueError( "Too large work array required -- computation cannot " "be performed with standard 32-bit LAPACK." ) lwork = lwork.astype(_np.int32) if lwork.size == 1: return lwork[0] return lwork
def test_float_modulus_corner_cases(self): # Check remainder magnitude. for dt in np.typecodes['Float']: b = np.array(1.0, dtype=dt) a = np.nextafter(np.array(0.0, dtype=dt), -b) rem = self.mod(a, b) assert_(rem <= b, 'dt: %s' % dt) rem = self.mod(-a, -b) assert_(rem >= -b, 'dt: %s' % dt) # Check nans, inf with warnings.catch_warnings(): warnings.simplefilter('always') warnings.simplefilter('ignore', RuntimeWarning) for dt in np.typecodes['Float']: fone = np.array(1.0, dtype=dt) fzer = np.array(0.0, dtype=dt) finf = np.array(np.inf, dtype=dt) fnan = np.array(np.nan, dtype=dt) rem = self.mod(fone, fzer) assert_(np.isnan(rem), 'dt: %s' % dt) # MSVC 2008 returns NaN here, so disable the check. #rem = self.mod(fone, finf) #assert_(rem == fone, 'dt: %s' % dt) rem = self.mod(fone, fnan) assert_(np.isnan(rem), 'dt: %s' % dt) rem = self.mod(finf, fone) assert_(np.isnan(rem), 'dt: %s' % dt)
def sample_n(self, n, seed=None, name="sample_n"): """Sample `n` observations from the Exponential Distributions. Args: n: `Scalar`, type int32, the number of observations to sample. seed: Python integer, the random seed. name: The name to give this op. Returns: samples: `[n, ...]`, a `Tensor` of `n` samples for each of the distributions determined by the hyperparameters. """ broadcast_shape = self._lam.get_shape() with ops.op_scope([self.lam, n], name, "ExponentialSample"): n = ops.convert_to_tensor(n, name="n") shape = array_ops.concat( 0, [array_ops.pack([n]), array_ops.shape(self._lam)]) # Sample uniformly-at-random from the open-interval (0, 1). sampled = random_ops.random_uniform( shape, minval=np.nextafter( self.dtype.as_numpy_dtype(0.), self.dtype.as_numpy_dtype(1.)), maxval=constant_op.constant(1.0, dtype=self.dtype), seed=seed, dtype=self.dtype) n_val = tensor_util.constant_value(n) final_shape = tensor_shape.vector(n_val).concatenate(broadcast_shape) sampled.set_shape(final_shape) return -math_ops.log(sampled) / self._lam
def test_float_modulus_corner_cases(self): # Check remainder magnitude. for dt in np.typecodes['Float']: b = np.array(1.0, dtype=dt) a = np.nextafter(np.array(0.0, dtype=dt), -b) rem = self.mod(a, b) assert_(rem <= b, 'dt: %s' % dt) rem = self.mod(-a, -b) assert_(rem >= -b, 'dt: %s' % dt) # Check nans, inf with suppress_warnings() as sup: sup.filter(RuntimeWarning, "invalid value encountered in remainder") for dt in np.typecodes['Float']: fone = np.array(1.0, dtype=dt) fzer = np.array(0.0, dtype=dt) finf = np.array(np.inf, dtype=dt) fnan = np.array(np.nan, dtype=dt) rem = self.mod(fone, fzer) assert_(np.isnan(rem), 'dt: %s' % dt) # MSVC 2008 returns NaN here, so disable the check. #rem = self.mod(fone, finf) #assert_(rem == fone, 'dt: %s' % dt) rem = self.mod(fone, fnan) assert_(np.isnan(rem), 'dt: %s' % dt) rem = self.mod(finf, fone) assert_(np.isnan(rem), 'dt: %s' % dt)
def _compute_mi_cc(x, y, n_neighbors): """Compute mutual information between two continuous variables. Parameters ---------- x, y : ndarray, shape (n_samples,) Samples of two continuous random variables, must have an identical shape. n_neighbors : int Number of nearest neighbors to search for each point, see [1]_. Returns ------- mi : float Estimated mutual information. If it turned out to be negative it is replace by 0. Notes ----- True mutual information can't be negative. If its estimate by a numerical method is negative, it means (providing the method is adequate) that the mutual information is close to 0 and replacing it by 0 is a reasonable strategy. References ---------- .. [1] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual information". Phys. Rev. E 69, 2004. """ n_samples = x.size x = x.reshape((-1, 1)) y = y.reshape((-1, 1)) xy = np.hstack((x, y)) # Here we rely on NearestNeighbors to select the fastest algorithm. nn = NearestNeighbors(metric='chebyshev', n_neighbors=n_neighbors) nn.fit(xy) radius = nn.kneighbors()[0] radius = np.nextafter(radius[:, -1], 0) # Algorithm is selected explicitly to allow passing an array as radius # later (not all algorithms support this). nn.set_params(algorithm='kd_tree') nn.fit(x) ind = nn.radius_neighbors(radius=radius, return_distance=False) nx = np.array([i.size for i in ind]) nn.fit(y) ind = nn.radius_neighbors(radius=radius, return_distance=False) ny = np.array([i.size for i in ind]) mi = (digamma(n_samples) + digamma(n_neighbors) - np.mean(digamma(nx + 1)) - np.mean(digamma(ny + 1))) return max(0, mi)
def _step_impl(self): t = self.t y = self.y max_step = self.max_step rtol = self.rtol atol = self.atol min_step = 10 * np.abs(np.nextafter(t, self.direction * np.inf) - t) if self.h_abs > max_step: h_abs = max_step elif self.h_abs < min_step: h_abs = min_step else: h_abs = self.h_abs order = self.order step_accepted = False while not step_accepted: if h_abs < min_step: return False, self.TOO_SMALL_STEP h = h_abs * self.direction t_new = t + h if self.direction * (t_new - self.t_bound) > 0: t_new = self.t_bound h = t_new - t h_abs = np.abs(h) y_new, f_new, error = rk_step(self.fun, t, y, self.f, h, self.A, self.B, self.C, self.E, self.K) scale = atol + np.maximum(np.abs(y), np.abs(y_new)) * rtol error_norm = norm(error / scale) if error_norm == 0.0: h_abs *= MAX_FACTOR step_accepted = True elif error_norm < 1: h_abs *= min(MAX_FACTOR, max(1, SAFETY * error_norm ** (-1 / (order + 1)))) step_accepted = True else: h_abs *= max(MIN_FACTOR, SAFETY * error_norm ** (-1 / (order + 1))) self.y_old = y self.t = t_new self.y = y_new self.h_abs = h_abs self.f = f_new return True, None
def compute_comp_ref(self,tidx,lvidx,lonidx,latidx,other): """Amend this so variables obtain at start fetch only correct date, lats, lons All levels need to be fetched as this is composite reflectivity """ T2 = self.get('T2',tidx,False,lonidx,latidx) # QR = self.nc.variables['QRAIN'][PS['t'],:,PS['la'],PS['lo']] QR = self.get('QRAIN',tidx,False,lonidx,latidx) # This should get all levels PSFC = self.get('PSFC',tidx,False,lonidx,latidx) try: QS = self.get('QSNOW',tidx,False,lonidx,latidx) except: QS = N.zeros(N.shape(QR)) rhor = 1000.0 rhos = 100.0 rhog = 400.0 rhoi = 917.0 no_rain = 8.0E6 # How do I access this time? no_snow = 2.0E6 * N.exp(-0.12*(T2-273.15)) no_grau = 4.0E6 density = N.divide(PSFC,(287.0 * T2)) Qra_all = QR[0,...] Qsn_all = QS[0,...] for j in range(len(Qra_all[1,:,1])): curcol_r = [] curcol_s = [] for i in range(len(Qra_all[1,1,:])): maxrval = N.max(Qra_all[:,j,i]) maxsval = N.max(Qsn_all[:,j,i]) curcol_r.append(maxrval) curcol_s.append(maxsval) N_curcol_r = N.array(curcol_r) N_curcol_s = N.array(curcol_s) if j == 0: Qra = N_curcol_r Qsn = N_curcol_s else: Qra = N.row_stack((Qra, N_curcol_r)) Qsn = N.row_stack((Qsn, N_curcol_s)) # Calculate slope factor lambda lambr = (N.divide((3.14159 * no_rain * rhor), N.multiply(density, Qra)+N.nextafter(0,1))) ** 0.25 lambs = N.exp(-0.0536 * (T2 - 273.15)) # Calculate equivalent reflectivity factor Zer = (720.0 * no_rain * (lambr ** -7.0)) * 1E18 Zes = (0.224 * 720.0 * no_snow * (lambr ** -7.0) * (rhos/rhoi) ** 2) * 1E18 Zes_int = N.divide((lambs * Qsn * density), no_snow) Zes = ((0.224 * 720 * 1E18) / (3.14159 * rhor) ** 2) * Zes_int ** 2 Ze = N.add(Zer, Zes) dBZ = N.nan_to_num(10*N.log10(Ze)) return dBZ
def gen(x, y, name): """Generate test data and write to file.""" z = np.nextafter(x, y) out = {"x": x.tolist(), "y": y.tolist(), "expected": z.tolist()} with open(name, "w") as f: json.dump(out, f)
def int_type_next_after(x, direction, itemsize): """Return the next representable neighbor of x in the appropriate direction.""" assert direction in [-1, +1] # x is guaranteed to be either an int or a float if direction < 0: if isinstance(x, int): return x - 1 else: # return int(PyNextAfter(x, x - 1)) return int(numpy.nextafter(x, x - 1)) else: if isinstance(x, int): return x + 1 else: # return int(PyNextAfter(x,x + 1)) + 1 return int(numpy.nextafter(x, x + 1)) + 1
def compute_new_range(self, request=None, use_auto=True): def en_int(range0): import numpy as np a = range0[0] b = range0[1] if a != 0: si = a/abs(a) ex = int(np.log10(abs(a))) # if (a/(10.**ex) % 1)== 0.: ai = (np.floor(a/(10.**ex)))*10.**ex # else: # ai = (np.floor(a/(10.**ex))-si)*10.**ex else: ai = 0. if b != 0: si = b/abs(b) ex = int(np.log10(abs(b))) if (b/(10.**ex) % 1) == 0.: bi = (np.floor(b/(10.**ex)))*10.**ex else: bi = (np.floor(b/(10.**ex))+1)*10.**ex else: bi = 0. return ai, bi def en_sym(range0): a = range0[0] b = range0[1] if abs(a) > abs(b): return (-abs(a), abs(a)) else: return (-abs(b), abs(b)) def en_int_sym(range, mode): if range[0] is not None: if np.iscomplex(range[0]): range = sorted([float(np.real(range[0])), float(np.real(range[1]))]) if mode[2]: d = abs(float(range[0])-float(range[1])) if range[1] > range[0]: range[1] = range[1]+d/10. range[0] = range[0]-d/10. else: range[1] = range[1]-d/10. range[0] = range[0]+d/10. if (mode[0] and range[0] is not None and range[1] is not None): range = en_int(range) if (mode[1] and range[0] is not None and range[1] is not None): range = en_sym(range) return range def _value2param(value): return (value[0], value[1], value[2], value[3], value[4], value[5], (value[6], value[7], value[8])) def _a2param(ax): return (ax.base, ax.auto, ax.range, ax.scale, ax.symloglin, ax.symloglinscale, ax.mode) data = [] newrange = {} # 0) first do xrange for ax in self._xaxis: base, auto, range, scale, symloglin, symscale, mode = _a2param(ax) if request is not None: for name, value in request: if name == ax.name: base, auto, range, scale, symloglin, symscale, mode = _value2param( value) break if (auto and use_auto) or range is None: range = [None]*2 for m in ax.walk_member(): if m.is_suppress(): continue range = m.get_xrange(range, scale=scale) range = en_int_sym(range, mode) if (range[0] is None or range[1] is None): range = (0, 1) if (range[0] == range[1]): range = (range[0]-0.5, range[0]+0.5) p = [base, auto, range, scale, symloglin, symscale, ] + list(mode) newrange[ax] = range data.append((ax.name, p)) # 1) second do yrange for ay in self._yaxis: base, auto, range, scale, symloglin, symscale, mode = _a2param(ay) if request is not None: for name, value in request: if name == ay.name: base, auto, range, scale, symloglin, symscale, mode = _value2param( value) break if (auto and use_auto) or range[0] is None: range = [None]*2 for m in ay.walk_member(): if m.is_suppress(): continue ax = m.get_xaxisparam() if ax in newrange: xrange = newrange[ax] else: xrange = ax.range range = m.get_yrange(range, xrange=xrange, scale=scale) range = en_int_sym(range, mode) if (range[0] is None or range[1] is None): range = (0, 1) if (range[0] == range[1]): range = (range[0]-0.5, range[0]+0.5) p = [base, auto, range, scale, symloglin, symscale, ] + list(mode) newrange[ay] = range data.append((ay.name, p)) # 2-1) third do zrange for az in self._zaxis: base, auto, range, scale, symloglin, symscale, mode = _a2param(az) if request is not None: for name, value in request: if name == az.name: base, auto, range, scale, symloglin, symscale, mode = _value2param( value) break if (auto and use_auto) or range[0] is None: range = [None]*2 for m in az.walk_member(): if m.is_suppress(): continue ax = m.get_xaxisparam() ay = m.get_yaxisparam() if ax in newrange: xrange = newrange[ax] else: xrange = ax.range if ay in newrange: yrange = newrange[ay] else: yrange = ay.range range = m.get_zrange(range, xrange=xrange, yrange=yrange, scale=scale) range = en_int_sym(range, mode) if (range[0] is None or range[1] is None): range = (0, 1) if (range[0] == range[1]): range = (range[0]-0.5, range[0]+0.5) p = [base, auto, range, scale, symloglin, symscale] + list(mode) data.append((az.name, p)) # 2-2) third do crange for ac in self._caxis: base, auto, range, scale, symloglin, symscale, mode = _a2param(ac) if request is not None: for name, value in request: if name == ac.name: base, auto, range, scale, symloglin, symscale, mode = _value2param( value) break if (auto and use_auto) or range[0] is None: range = [None]*2 for m in ac.walk_member(): if m.is_suppress(): continue ax = m.get_xaxisparam() ay = m.get_yaxisparam() if ax in newrange: xrange = newrange[ax] else: xrange = ax.range if ay in newrange: yrange = newrange[ay] else: yrange = ay.range range = m.get_crange(range, xrange=xrange, yrange=yrange, scale=scale) range = en_int_sym(range, mode) if (range[0] is None or range[1] is None): range = (0, 1) if (range[0] == range[1]): if range[0] == 0: delta = 1 - np.nextafter(1.0, 0.0) range = (-abs(delta), abs(delta)) else: range = (range[0]-abs(range[0])/10, range[0]+abs(range[0])/10) p = [base, auto, range, scale, symloglin, symscale, ] + list(mode) data.append((ac.name, p)) return data
bigram_prob[charid[name[i]], charid[name[i+1]]] += 1 trigram_prob[charid[name[i]], charid[name[i+1]], charid[name[i+2]]] += 1 if len(name) == 1: bigram_prob[charid[name[0]], charid['end']] += 1 elif len(name) == 2: bigram_prob[charid[name[0]], charid[name[1]]] += 1 bigram_prob[charid[name[1]], charid['end']] += 1 trigram_prob[charid[name[0]], charid[name[1]], charid['end']] += 1 else: bigram_prob[charid[name[-1]], charid['end']] += 1 trigram_prob[charid[name[-2]], charid[name[-1]], charid['end']] += 1 bigram_prob = bigram_prob / bigram_prob.sum(axis=1, keepdims=True) trigram_prob = trigram_prob / (trigram_prob.sum(axis=2, keepdims=True) + np.nextafter(0, 1)) def create_sample(seed=None): if seed: np.random.seed(seed) variable_name = '' stop_chance = 0 curr = np.random.choice(list(start_char_prob.keys()), p=list(start_char_prob.values())) variable_name += curr curr = np.random.choice(list(charid.keys()), p=bigram_prob[charid[curr], :]) while curr != 'end': variable_name += curr if np.random.uniform(0, 1) < stop_chance:
def _normal(key, shape, dtype): _check_shape("normal", shape) lo = onp.nextafter(onp.array(-1., dtype), 0., dtype=dtype) hi = onp.array(1., dtype) u = uniform(key, shape, dtype, lo, hi) return onp.array(onp.sqrt(2), dtype) * lax.erf_inv(u)
from __future__ import (absolute_import, division, print_function) import numpy as np from functools import wraps from lmfit.model import Model MIN_POS_DBL = np.nextafter(0, 1) # minimum positive float def prefix_params(param_expr): r"""Prepend parameter names with prefix in parameter expressions Parameters ---------- param_expr: function bound method of a model returning an expression for a parameter in string format Returns ------- function """ @wraps(param_expr) def wrapper(model_instance): if not isinstance(model_instance, Model): raise TypeError('Function argument is not a Model instance') prefix = model_instance.prefix p_e = param_expr(model_instance) # the parameter expression in str for prefixed_name in model_instance.param_names: name = prefixed_name.replace(prefix, '') # drop the prefix p_e = p_e.replace(name, prefixed_name)
def __init__(self, coord, value=0.0): self._coord = coord self._value = value # it will behave like an very small interval self._end = np.nextafter(self._coord, self._coord + 1)
def _compute_mi_cd(c, d, n_neighbors): """Compute mutual information between continuous and discrete variables. Parameters ---------- c : ndarray, shape (n_samples,) Samples of a continuous random variable. d : ndarray, shape (n_samples,) Samples of a discrete random variable. n_neighbors : int Number of nearest neighbors to search for each point, see [1]_. Returns ------- mi : float Estimated mutual information. If it turned out to be negative it is replace by 0. Notes ----- True mutual information can't be negative. If its estimate by a numerical method is negative, it means (providing the method is adequate) that the mutual information is close to 0 and replacing it by 0 is a reasonable strategy. References ---------- .. [1] B. C. Ross "Mutual Information between Discrete and Continuous Data Sets". PLoS ONE 9(2), 2014. """ n_samples = c.shape[0] c = c.reshape((-1, 1)) radius = np.empty(n_samples) label_counts = np.empty(n_samples) k_all = np.empty(n_samples) nn = NearestNeighbors() for label in np.unique(d): mask = d == label count = np.sum(mask) if count > 1: k = min(n_neighbors, count - 1) nn.set_params(n_neighbors=k) nn.fit(c[mask]) r = nn.kneighbors()[0] radius[mask] = np.nextafter(r[:, -1], 0) k_all[mask] = k label_counts[mask] = count # Ignore points with unique labels. mask = label_counts > 1 n_samples = np.sum(mask) label_counts = label_counts[mask] k_all = k_all[mask] c = c[mask] radius = radius[mask] nn.set_params(algorithm='kd_tree') nn.fit(c) ind = nn.radius_neighbors(radius=radius, return_distance=False) m_all = np.array([i.size for i in ind]) mi = (digamma(n_samples) + np.mean(digamma(k_all)) - np.mean(digamma(label_counts)) - np.mean(digamma(m_all + 1))) return max(0, mi)
def run(walk: Callable, sample: Callable, delta: Callable, log_handler: Callable, theta_0, observed: Sequence, simulation_n: int, boundaries: Sequence, r: float, bin_n: int) -> None: """ Our approach: a weighted regression-based likelihood approximator using MCMC to walk around our posterior distribution. My interpretation of this approach is given below: 1) We start with some initial guess theta_0. Right off the bat, we move to another theta from theta_0. 2) For 'boundaries[1] - boundaries[0]' iterations... a) For 'simulation_n' iterations... i) We simulate a population using the given theta. ii) For each observed frequency ... 'D' 1) We compute the difference between the two distributions. 2) ** Apply our weighted regression likelihood approximator here. ** Obtain a probability. c) If this probability is greater than the probability of the previous, we accept. d) Otherwise, we accept our proposed with probability p(proposed) / p(prev). :param walk: Function that accepts some parameter set and returns another parameter set. :param sample: Function that produces a collection of repeat lengths (i.e. the model function). :param delta: Frequency distribution distance function. 0 = exact match, 1 = maximally dissimilar. :param log_handler: Function that handles what occurs with the current Markov chain and results. :param theta_0: Initial starting point. :param observed: 2D list of (int, float) tuples representing the (repeat length, frequency) tuples. :param simulation_n: Number of simulations to use to obtain a distance. :param boundaries: Starting and ending iteration for this specific MCMC run. :param r: Exponential decay rate for weight vector used in regression (a=1). :param bin_n: Number of bins used to construct histogram. :return: None. """ from numpy import zeros, mean, nextafter, RankWarning from types import SimpleNamespace from warnings import simplefilter from numpy.random import uniform from datetime import datetime # We need to filter out all of the rank warnings. simplefilter('ignore', RankWarning) # Save our results according to the namespace below. a_record = lambda a_1, b_1, c_1, d_1, e_1, f_1: SimpleNamespace( theta=a_1, time_r=b_1, waiting_time=c_1, p_proposed=d_1, expected_delta=e_1, proposed_time=f_1) # Seed our Markov chain with our initial guess. x = [a_record(theta_0, 0, 1, 0, 0, 0)] for i in range(boundaries[0] + 1, boundaries[1]): theta_proposed = walk(x[-1].theta) # Walk from our previous state. # Generate our D matrix. d = zeros((simulation_n, len(observed)), dtype='float64') populate_d(d, observed, sample, delta, theta_proposed, [theta_proposed.kappa, theta_proposed.omega]) # Compute our likelihood vector. v = _generate_v(d, r, bin_n) # Accept our proposal according to our alpha value. p_proposed, p_k = _likelihood_from_v(v), x[-1].p_proposed if abs(0 - p_k) < nextafter(0, 1) or p_proposed / p_k > uniform(0, 1): x = x + [ a_record(theta_proposed, datetime.now(), 1, p_proposed, mean(d), i) ] # Reject our proposal. We keep our current state and increment our waiting times. else: x[-1].waiting_time += 1 # We record to our chain. This is dependent on the current iteration of MCMC. log_handler(x, i)
), "logaddexp2": pandas_udf( lambda s1, s2: np.logaddexp2(s1, s2), DoubleType(), PandasUDFType.SCALAR ), "logical_and": lambda c1, c2: c1.cast(BooleanType()) & c2.cast(BooleanType()), "logical_or": lambda c1, c2: c1.cast(BooleanType()) | c2.cast(BooleanType()), "logical_xor": lambda c1, c2: ( # mimics xor by logical operators. (c1.cast(BooleanType()) | c2.cast(BooleanType())) & (~(c1.cast(BooleanType())) | ~(c2.cast(BooleanType()))) ), "maximum": F.greatest, "minimum": F.least, "modf": pandas_udf(lambda s1, s2: np.modf(s1, s2), DoubleType(), PandasUDFType.SCALAR), "nextafter": pandas_udf( lambda s1, s2: np.nextafter(s1, s2), DoubleType(), PandasUDFType.SCALAR ), "right_shift": pandas_udf( lambda s1, s2: np.right_shift(s1, s2), LongType(), PandasUDFType.SCALAR ), } ) # Copied from pandas. # See also https://docs.scipy.org/doc/numpy/reference/arrays.classes.html#standard-array-subclasses def maybe_dispatch_ufunc_to_dunder_op( ser_or_index: "IndexOpsMixin", ufunc: Callable, method: str, *inputs: Any, **kwargs: Any ) -> "IndexOpsMixin": special = { "add",
def __call__(self, X, alpha=None, bytes=False): """ *X* is either a scalar or an array (of any dimension). If scalar, a tuple of rgba values is returned, otherwise an array with the new shape = oldshape+(4,). If the X-values are integers, then they are used as indices into the array. If they are floating point, then they must be in the interval (0.0, 1.0). Alpha must be a scalar between 0 and 1, or None. If bytes is False, the rgba values will be floats on a 0-1 scale; if True, they will be uint8, 0-255. """ if not self._isinit: self._init() mask_bad = None if not cbook.iterable(X): vtype = 'scalar' xa = np.array([X]) else: vtype = 'array' xma = ma.array(X, copy=True) # Copy here to avoid side effects. mask_bad = xma.mask # Mask will be used below. xa = xma.filled() # Fill to avoid infs, etc. del xma # Calculations with native byteorder are faster, and avoid a # bug that otherwise can occur with putmask when the last # argument is a numpy scalar. if not xa.dtype.isnative: xa = xa.byteswap().newbyteorder() if xa.dtype.kind == "f": # Treat 1.0 as slightly less than 1. vals = np.array([1, 0], dtype=xa.dtype) almost_one = np.nextafter(*vals) cbook._putmask(xa, xa == 1.0, almost_one) # The following clip is fast, and prevents possible # conversion of large positive values to negative integers. xa *= self.N if NP_CLIP_OUT: np.clip(xa, -1, self.N, out=xa) else: xa = np.clip(xa, -1, self.N) # ensure that all 'under' values will still have negative # value after casting to int cbook._putmask(xa, xa < 0.0, -1) xa = xa.astype(int) # Set the over-range indices before the under-range; # otherwise the under-range values get converted to over-range. cbook._putmask(xa, xa > self.N - 1, self._i_over) cbook._putmask(xa, xa < 0, self._i_under) if mask_bad is not None: if mask_bad.shape == xa.shape: cbook._putmask(xa, mask_bad, self._i_bad) elif mask_bad: xa.fill(self._i_bad) if bytes: lut = (self._lut * 255).astype(np.uint8) else: lut = self._lut.copy() # Don't let alpha modify original _lut. if alpha is not None: alpha = min(alpha, 1.0) # alpha must be between 0 and 1 alpha = max(alpha, 0.0) if bytes: alpha = int(alpha * 255) if (lut[-1] == 0).all(): lut[:-1, -1] = alpha # All zeros is taken as a flag for the default bad # color, which is no color--fully transparent. We # don't want to override this. else: lut[:, -1] = alpha # If the bad value is set to have a color, then we # override its alpha just as for any other value. rgba = np.empty(shape=xa.shape + (4, ), dtype=lut.dtype) lut.take(xa, axis=0, mode='clip', out=rgba) # twice as fast as lut[xa]; # using the clip or wrap mode and providing an # output array speeds it up a little more. if vtype == 'scalar': rgba = tuple(rgba[0, :]) return rgba
def compute_mi_cc(x, y, n_neighbors=3): leaf_size = 30 x = x.reshape((-1, 1)) y = y.reshape((-1, 1)) xy = np.hstack((x, y)) n_samples = xy.shape[0] n_features = xy.shape[1] radius = np.empty(n_samples) # create the objects that are going to be needed for NN n_levels = 1 + np.log2(max(1, ((n_samples - 1) // leaf_size))) n_nodes = int(2**n_levels) - 1 # allocate arrays for storage idx_array = np.arange(n_samples) node_radius = np.zeros(n_nodes, dtype=np.float64) node_idx_start = np.zeros(n_nodes, dtype=np.int64) node_idx_end = np.zeros(n_nodes, dtype=np.int64) node_is_leaf = np.zeros(n_nodes, dtype=np.int64) node_centroids = np.zeros((n_nodes, n_features), dtype=np.float64) # set metric==1 for chebyshev distance ball_tree.recursive_build(0, 0, n_samples, xy, node_centroids, node_radius, idx_array, node_idx_start, node_idx_end, node_is_leaf, n_nodes, leaf_size, metric=1) # This algorithm returns the point itself as a neighbor, so # if n_neighbors need to be returned then '1' needs to be # added in order to get the correct value from 'nth' # neighbor when the heap is created heap_distances, heap_indices = ball_tree.heap_create( n_samples, n_neighbors + 1) ball_tree.query(0, xy, heap_distances, heap_indices, xy, idx_array, node_centroids, node_radius, node_is_leaf, node_idx_start, node_idx_end, metric=1) ball_tree.heap_sort(heap_distances, heap_indices) radius = np.nextafter(heap_distances[:, -1], 0) # A whole new set of Tree elements need to be created for the KDTree # algorithms that are going to be run on both the x and y arrays that # were initially passed in. # # Perform KD-tree NN on x array n_samples_kd = x.shape[0] # determine number of levels in the tree, and from this # the number of nodes in the tree. This results in leaf nodes # with numbers of points betweeen leaf_size and 2 * leaf_size n_levels_kd = 1 + np.log2(max(1, ((n_samples_kd - 1) // leaf_size))) # having to round first and then apply int in order to calculate # correct number of nodes n_nodes_kd = int(round((2**n_levels_kd))) - 1 # allocate arrays for storage idx_array_kd = np.arange(n_samples_kd) node_radius_kd = np.zeros(n_nodes_kd, dtype=np.float64) node_idx_start_kd = np.zeros(n_nodes_kd, dtype=np.int64) node_idx_end_kd = np.zeros(n_nodes_kd, dtype=np.int64) node_is_leaf_kd = np.zeros(n_nodes_kd, dtype=np.int64) node_lower_bounds_kd = np.zeros((n_nodes_kd, n_features), dtype=np.float64) node_upper_bounds_kd = np.zeros((n_nodes_kd, n_features), dtype=np.float64) # use 'chebyshev' distance as metric (metric==1) kd_tree.recursive_build(0, 0, n_samples_kd, x, node_lower_bounds_kd, node_upper_bounds_kd, node_radius_kd, idx_array_kd, node_idx_start_kd, node_idx_end_kd, node_is_leaf_kd, n_nodes_kd, leaf_size, metric=1) count_only = True return_distance = False counts_x = \ kd_tree.radius_neighbors_count(x, radius, idx_array_kd, node_lower_bounds_kd, node_upper_bounds_kd, node_radius_kd, node_is_leaf_kd, node_idx_start_kd, node_idx_end_kd, count_only, return_distance, metric=1) # Perform KD-tree NN on y array # Note: The data structures to perform the KD-tree build and search should # be the same for x and y. In order to preserve memory, re-using the same # objects # use 'chebyshev' distance as metric (metric==1) kd_tree.recursive_build(0, 0, n_samples_kd, y, node_lower_bounds_kd, node_upper_bounds_kd, node_radius_kd, idx_array_kd, node_idx_start_kd, node_idx_end_kd, node_is_leaf_kd, n_nodes_kd, leaf_size, metric=1) count_only = True return_distance = False counts_y = \ kd_tree.radius_neighbors_count(y, radius, idx_array_kd, node_lower_bounds_kd, node_upper_bounds_kd, node_radius_kd, node_is_leaf_kd, node_idx_start_kd, node_idx_end_kd, count_only, return_distance, metric=1) mi = (digamma_cpu(n_samples) + digamma_cpu(n_neighbors) - np.mean(digamma_cpu(counts_x)) - np.mean(digamma_cpu(counts_y))) mi = max(0, mi) return mi
def choropleth(self, geo_data, data=None, columns=None, key_on=None, bins=6, fill_color='blue', nan_fill_color='black', fill_opacity=0.6, nan_fill_opacity=None, line_color='black', line_weight=1, line_opacity=1, name=None, legend_name='', topojson=None, reset=False, smooth_factor=None, highlight=None, **kwargs): """ Apply a GeoJSON overlay to the map. Plot a GeoJSON overlay on the base map. There is no requirement to bind data (passing just a GeoJSON plots a single-color overlay), but there is a data binding option to map your columnar data to different feature objects with a color scale. If data is passed as a Pandas DataFrame, the "columns" and "key-on" keywords must be included, the first to indicate which DataFrame columns to use, the second to indicate the layer in the GeoJSON on which to key the data. The 'columns' keyword does not need to be passed for a Pandas series. Colors are generated from color brewer (http://colorbrewer2.org/) sequential palettes. By default, linear binning is used between the min and the max of the values. Custom binning can be achieved with the `bins` parameter. TopoJSONs can be passed as "geo_data", but the "topojson" keyword must also be passed with the reference to the topojson objects to convert. See the topojson.feature method in the TopoJSON API reference: https://github.com/topojson/topojson/wiki/API-Reference Parameters ---------- geo_data: string/object URL, file path, or data (json, dict, geopandas, etc) to your GeoJSON geometries data: Pandas DataFrame or Series, default None Data to bind to the GeoJSON. columns: dict or tuple, default None If the data is a Pandas DataFrame, the columns of data to be bound. Must pass column 1 as the key, and column 2 the values. key_on: string, default None Variable in the `geo_data` GeoJSON file to bind the data to. Must start with 'feature' and be in JavaScript objection notation. Ex: 'feature.id' or 'feature.properties.statename'. bins: int or sequence of scalars or str, default 6 If `bins` is an int, it defines the number of equal-width bins between the min and the max of the values. If `bins` is a sequence, it directly defines the bin edges. For more information on this parameter, have a look at numpy.histogram function. fill_color: string, default 'blue' Area fill color. Can pass a hex code, color name, or if you are binding data, one of the following color brewer palettes: 'BuGn', 'BuPu', 'GnBu', 'OrRd', 'PuBu', 'PuBuGn', 'PuRd', 'RdPu', 'YlGn', 'YlGnBu', 'YlOrBr', and 'YlOrRd'. nan_fill_color: string, default 'black' Area fill color for nan or missing values. Can pass a hex code, color name. fill_opacity: float, default 0.6 Area fill opacity, range 0-1. nan_fill_opacity: float, default fill_opacity Area fill opacity for nan or missing values, range 0-1. line_color: string, default 'black' GeoJSON geopath line color. line_weight: int, default 1 GeoJSON geopath line weight. line_opacity: float, default 1 GeoJSON geopath line opacity, range 0-1. legend_name: string, default empty string Title for data legend. topojson: string, default None If using a TopoJSON, passing "objects.yourfeature" to the topojson keyword argument will enable conversion to GeoJSON. reset: boolean, default False Remove all current geoJSON layers, start with new layer smooth_factor: float, default None How much to simplify the polyline on each zoom level. More means better performance and smoother look, and less means more accurate representation. Leaflet defaults to 1.0. highlight: boolean, default False Enable highlight functionality when hovering over a GeoJSON area. Returns ------- GeoJSON data layer in obj.template_vars Examples -------- >>> m.choropleth(geo_data='us-states.json', line_color='blue', ... line_weight=3) >>> m.choropleth(geo_data='geo.json', data=df, ... columns=['Data 1', 'Data 2'], ... key_on='feature.properties.myvalue', ... fill_color='PuBu', ... bins=[0, 20, 30, 40, 50, 60]) >>> m.choropleth(geo_data='countries.json', ... topojson='objects.countries') >>> m.choropleth(geo_data='geo.json', data=df, ... columns=['Data 1', 'Data 2'], ... key_on='feature.properties.myvalue', ... fill_color='PuBu', ... bins=[0, 20, 30, 40, 50, 60], ... highlight=True) """ if data is not None and not color_brewer(fill_color): raise ValueError('Please pass a valid color brewer code to ' 'fill_local. See docstring for valid codes.') if nan_fill_opacity is None: nan_fill_opacity = fill_opacity if 'threshold_scale' in kwargs: if kwargs['threshold_scale'] is not None: bins = kwargs['threshold_scale'] warnings.warn( 'choropleth `threshold_scale` parameter is now depreciated ' 'in favor of the `bins` parameter.', DeprecationWarning) # Create color_data dict if hasattr(data, 'set_index'): # This is a pd.DataFrame color_data = data.set_index(columns[0])[columns[1]].to_dict() elif hasattr(data, 'to_dict'): # This is a pd.Series color_data = data.to_dict() elif data: color_data = dict(data) else: color_data = None if color_data is not None and key_on is not None: real_values = np.array(list(color_data.values())) real_values = real_values[~np.isnan(real_values)] _, bin_edges = np.histogram(real_values, bins=bins) bins_min, bins_max = min(bin_edges), max(bin_edges) if np.any((real_values < bins_min) | (real_values > bins_max)): raise ValueError( 'All values are expected to fall into one of the provided ' 'bins (or to be Nan). Please check the `bins` parameter ' 'and/or your data.') # We add the colorscale nb_bins = len(bin_edges) - 1 color_range = color_brewer(fill_color, n=nb_bins) color_scale = StepColormap(color_range, index=bin_edges, vmin=bins_min, vmax=bins_max, caption=legend_name) self.add_child(color_scale) # then we 'correct' the last edge for numpy digitize # (we add a very small amount to fake an inclusive right interval) increasing = bin_edges[0] <= bin_edges[-1] bin_edges[-1] = np.nextafter(bin_edges[-1], (1 if increasing else -1) * np.inf) key_on = key_on[8:] if key_on.startswith('feature.') else key_on def get_by_key(obj, key): return (obj.get(key, None) if len(key.split('.')) <= 1 else get_by_key(obj.get(key.split('.')[0], None), '.'.join( key.split('.')[1:]))) def color_scale_fun(x): key_of_x = get_by_key(x, key_on) if key_of_x not in color_data.keys(): return nan_fill_color, nan_fill_opacity value_of_x = color_data[key_of_x] if np.isnan(value_of_x): return nan_fill_color, nan_fill_opacity color_idx = np.digitize(value_of_x, bin_edges, right=False) - 1 return color_range[color_idx], fill_opacity else: def color_scale_fun(x): return fill_color, fill_opacity def style_function(x): color, opacity = color_scale_fun(x) return { 'weight': line_weight, 'opacity': line_opacity, 'color': line_color, 'fillOpacity': opacity, 'fillColor': color } def highlight_function(x): return { 'weight': line_weight + 2, 'fillOpacity': fill_opacity + .2 } if topojson: geo_json = TopoJson(geo_data, topojson, name=name, style_function=style_function, smooth_factor=smooth_factor) else: geo_json = GeoJson( geo_data, name=name, style_function=style_function, smooth_factor=smooth_factor, highlight_function=highlight_function if highlight else None) self.add_child(geo_json)
def add_extrema_to_style(style): """Add a min and max to each style class in a style dictionary. When InaSAFE provides style classes they are specific values, not ranges. However QGIS wants to work in ranges, so this helper will address that by updating the dictionary to include a min max value for each class. It is assumed that we will start for 0 as the min for the first class and the quantity of each class shall constitute the max. For all other classes , min shall constitute the smalles increment to a float that can meaningfully be made by python (as determined by numpy.nextafter()). :param style: A list of dictionaries of the form as per the example below. :type style: list(dict) :returns: A new dictionary list with min max attributes added to each entry. :rtype: list(dict) Example input:: style_classes = [dict(colour='#38A800', quantity=2, transparency=0), dict(colour='#38A800', quantity=5, transparency=50), dict(colour='#79C900', quantity=10, transparency=50), dict(colour='#CEED00', quantity=20, transparency=50), dict(colour='#FFCC00', quantity=50, transparency=34), dict(colour='#FF6600', quantity=100, transparency=77), dict(colour='#FF0000', quantity=200, transparency=24), dict(colour='#7A0000', quantity=300, transparency=22)] Example output:: style_classes = [dict(colour='#38A800', quantity=2, transparency=0, min=0, max=2), dict(colour='#38A800', quantity=5, transparency=50, min=2.0000000000002, max=5), ), dict(colour='#79C900', quantity=10, transparency=50, min=5.0000000000002, max=10),), dict(colour='#CEED00', quantity=20, transparency=50, min=5.0000000000002, max=20),), dict(colour='#FFCC00', quantity=50, transparency=34, min=20.0000000000002, max=50),), dict(colour='#FF6600', quantity=100, transparency=77, min=50.0000000000002, max=100),), dict(colour='#FF0000', quantity=200, transparency=24, min=100.0000000000002, max=200),), dict(colour='#7A0000', quantity=300, transparency=22, min=200.0000000000002, max=300),)] """ new_styles = [] last_max = 0.0 for style_class in style: quantity = float(style_class['quantity']) style_class['min'] = last_max style_class['max'] = quantity if quantity == last_max and quantity != 0: # skip it as it does not represent a class increment continue last_max = numpy.nextafter(quantity, sys.float_info.max) new_styles.append(style_class) return new_styles
def _step_impl(self): t = self.t y = self.y f = self.f max_step = self.max_step atol = self.atol rtol = self.rtol min_step = 10 * np.abs(np.nextafter(t, self.direction * np.inf) - t) if self.h_abs > max_step: h_abs = max_step h_abs_old = None error_norm_old = None elif self.h_abs < min_step: h_abs = min_step h_abs_old = None error_norm_old = None else: h_abs = self.h_abs h_abs_old = self.h_abs_old error_norm_old = self.error_norm_old J = self.J LU_real = self.LU_real LU_complex = self.LU_complex current_jac = self.current_jac jac = self.jac rejected = False step_accepted = False message = None while not step_accepted: if h_abs < min_step: return False, self.TOO_SMALL_STEP h = h_abs * self.direction t_new = t + h if self.direction * (t_new - self.t_bound) > 0: t_new = self.t_bound h = t_new - t h_abs = np.abs(h) if self.sol is None: Z0 = np.zeros((3, y.shape[0])) else: Z0 = self.sol(t + h * C).T - y scale = atol + np.abs(y) * rtol converged = False while not converged: if LU_real is None or LU_complex is None: LU_real = self.lu(MU_REAL / h * self.I - J) LU_complex = self.lu(MU_COMPLEX / h * self.I - J) converged, n_iter, Z, rate = solve_collocation_system( self.fun, t, y, h, Z0, scale, self.newton_tol, LU_real, LU_complex, self.solve_lu) if not converged: if current_jac: break J = self.jac(t, y, f) current_jac = True LU_real = None LU_complex = None if not converged: h_abs *= 0.5 LU_real = None LU_complex = None continue y_new = y + Z[-1] ZE = Z.T.dot(E) / h error = self.solve_lu(LU_real, f + ZE) scale = atol + np.maximum(np.abs(y), np.abs(y_new)) * rtol error_norm = norm(error / scale) safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER + n_iter) if rejected and error_norm > 1: error = self.solve_lu(LU_real, self.fun(t, y + error) + ZE) error_norm = norm(error / scale) if error_norm > 1: factor = predict_factor(h_abs, h_abs_old, error_norm, error_norm_old) h_abs *= max(MIN_FACTOR, safety * factor) LU_real = None LU_complex = None rejected = True else: step_accepted = True recompute_jac = jac is not None and n_iter > 2 and rate > 1e-3 factor = predict_factor(h_abs, h_abs_old, error_norm, error_norm_old) factor = min(MAX_FACTOR, safety * factor) if not recompute_jac and factor < 1.2: factor = 1 else: LU_real = None LU_complex = None f_new = self.fun(t_new, y_new) if recompute_jac: J = jac(t_new, y_new, f_new) current_jac = True elif jac is not None: current_jac = False self.h_abs_old = self.h_abs self.error_norm_old = error_norm self.h_abs = h_abs * factor self.y_old = y self.t = t_new self.y = y_new self.f = f_new self.Z = Z self.LU_real = LU_real self.LU_complex = LU_complex self.current_jac = current_jac self.J = J self.t_old = t self.sol = self._compute_dense_output() return step_accepted, message
def sinc_pattern(x): x[x==0] = np.nextafter(0,1) return np.sin(x*np.pi)/(np.pi*x)
def wavelet_transform(X, rate, filters='rat', hg_only=True, X_fft_h=None, npad=1000): """Apply a wavelet transform using a prespecified set of filters. Calculates the center frequencies and bandwidths for the wavelets and applies them along with a heavyside function to the fft of the signal before performing an inverse fft. Parameters ---------- X : ndarray (n_time, n_channels) Input data, dimensions rate : float Number of samples per second. filters : str (optional) Which type of filters to use. Options are 'rat': center frequencies spanning 2-1200 Hz, constant Q, 54 bands 'human': center frequencies spanning 4-200 Hz, constant Q, 40 bands 'changlab': center frequencies spanning 4-200 Hz, variable Q, 40 bands Note - calculating center frequencies above rate/2 raises a ValueError hg_only : bool If True, only the amplitudes in the high gamma range [70-150 Hz] is computed. X_fft_h : ndarray (n_time, n_channels) Precomputed product of X_fft and heavyside. Useful for when bands are computed independently. npad : int Length of padding in samples. Default 1000. npad : int Padding to add to beginning and end of timeseries. Default 1000. Returns ------- Xh : ndarray, complex Bandpassed analytic signal X_fft_h : ndarray, complex Product of X_fft and heavyside. cfs : ndarray Center frequencies used. sds : ndarray Bandwidths used. """ if X_fft_h is None: npads, to_removes, _ = _npads(X, npad) X = _smart_pad(X, npads) n_time = X.shape[0] else: n_time = X_fft_h.shape[0] freq = fftfreq(n_time, 1. / rate) # Calculate center frequencies if filters in ['human', 'changlab']: cfs = log_spaced_cfs(4.0749286538265, 200, 40) elif filters == 'rat': cfs = log_spaced_cfs(2.6308, 1200., 54) else: raise NotImplementedError # Subselect high gamma bands if hg_only: idxs = np.logical_and(cfs >= 70., cfs <= 150.) cfs = cfs[idxs] # Raise exception if sample rate too small if cfs.max() * 2. > np.nextafter(rate, np.inf): # Allow floating point tolerance string = ('Unable to compute wavelet transform above Nyquist rate ({} Hz).' + ' Increase your rate ({} Hz) to at least twice your desired maximum' + 'frequency of interest.') raise ValueError(string.format(cfs.max() * 2., np.nextafter(rate, np.inf))) # Calculate bandwidths if filters in ['rat', 'human']: sds = const_Q_sds(cfs) elif filters == 'changlab': sds = chang_sds(cfs) else: raise NotImplementedError filters = [] for cf, sd in zip(cfs, sds): filters.append(gaussian(n_time, rate, cf, sd)) Xh = np.zeros(X.shape + (len(filters),), dtype=np.complex) if X_fft_h is None: # Heavyside filter with 0 DC h = np.zeros(len(freq)) h[freq > 0] = 2. h = h[:, np.newaxis] X_fft_h = fft(X, axis=0) * h for ii, f in enumerate(filters): if f is None: Xh[..., ii] = ifft(X_fft_h, axis=0) else: f = f / np.linalg.norm(f) Xh[..., ii] = ifft(X_fft_h * f[:, np.newaxis], axis=0) Xh = _trim(Xh, to_removes) return Xh, X_fft_h, cfs, sds
import bayesmark.constants as cc import bayesmark.expected_max as em import bayesmark.quantiles as qt from bayesmark.cmd_parse import CmdArgs, general_parser, parse_args from bayesmark.constants import ARG_DELIM, ITER, METHOD, PERF_BEST, PERF_CLIP, PERF_MEAN, PERF_MED, SUGGEST, TEST_CASE from bayesmark.experiment_aggregate import validate_agg_perf from bayesmark.serialize import XRSerializer from bayesmark.util import str_join_safe from bayesmark.xr_util import ds_concat, ds_like_mixed # Mathematical settings # We could move these to constants to eliminate repetition but we will probably phase out anyway EVAL_Q = 0.5 # Evaluate based on median loss across n_trials ALPHA = 0.05 # ==> 95% CIs MIN_POS = np.nextafter(0, 1) PAD_FACTOR = 10000 logger = logging.getLogger(__name__) def validate(baseline_ds): """Perform same tracks as will happen in analysis.""" for func_name in baseline_ds.coords[TEST_CASE].values: rand_perf_med = baseline_ds[PERF_MED].sel({TEST_CASE: func_name}, drop=True).values rand_perf_mean = baseline_ds[PERF_MEAN].sel({TEST_CASE: func_name}, drop=True).values best_opt = baseline_ds[PERF_BEST].sel({TEST_CASE: func_name}, drop=True).values base_clip_val = baseline_ds[PERF_CLIP].sel({TEST_CASE: func_name}, drop=True).values assert np.all(np.diff(rand_perf_med) <= 0), "Baseline should be decreasing with iteration" assert np.all(np.diff(rand_perf_mean) <= 0), "Baseline should be decreasing with iteration"
def _define_probability_bins(self, n_probability_bins, single_value_lower_limit, single_value_upper_limit): """ Define equally sized probability bins for use in a reliability table. The range 0 to 1 is divided into ranges to give n_probability bins. If single_value_lower_limit and / or single_value_upper_limit are True, additional bins corresponding to values of 0 and / or 1 will be created, each with a width defined by self.single_value_tolerance. Args: n_probability_bins (int): The total number of probability bins desired in the reliability tables. This number includes the extrema bins (equals 0 and equals 1) if single value limits are turned on, in which case the minimum number of bins is 3. single_value_lower_limit (bool): Mandates that the lowest bin should be single valued, with a small precision tolerance, defined as 1.0E-6. The bin is thus 0 to 1.0E-6. single_value_upper_limit (bool): Mandates that the highest bin should be single valued, with a small precision tolerance, defined as 1.0E-6. The bin is thus (1 - 1.0E-6) to 1. Returns: numpy.ndarray: An array of 2-element arrays that contain the bounds of the probability bins. These bounds are non-overlapping, with adjacent bin boundaries spaced at the smallest representable interval. Raises: ValueError: If trying to use both single_value_lower_limit and single_value_upper_limit with 2 or fewer probability bins. """ if single_value_lower_limit and single_value_upper_limit: if n_probability_bins <= 2: msg = ("Cannot use both single_value_lower_limit and " "single_value_upper_limit with 2 or fewer " "probability bins.") raise ValueError(msg) n_probability_bins = n_probability_bins - 2 elif single_value_lower_limit or single_value_upper_limit: n_probability_bins = n_probability_bins - 1 bin_lower = np.linspace(0, 1, n_probability_bins + 1, dtype=np.float32) bin_upper = np.nextafter(bin_lower, 0, dtype=np.float32) bin_upper[-1] = 1.0 bins = np.stack([bin_lower[:-1], bin_upper[1:]], 1).astype(np.float32) if single_value_lower_limit: bins[0, 0] = np.nextafter(self.single_value_tolerance, 1, dtype=np.float32) lowest_bin = np.array([0, self.single_value_tolerance], dtype=np.float32) bins = np.vstack([lowest_bin, bins]).astype(np.float32) if single_value_upper_limit: bins[-1, 1] = np.nextafter(1.0 - self.single_value_tolerance, 0, dtype=np.float32) highest_bin = np.array([1.0 - self.single_value_tolerance, 1], dtype=np.float32) bins = np.vstack([bins, highest_bin]).astype(np.float32) return bins
from aesara.compile.builders import OpFromGraph from aesara.graph.basic import Apply from aesara.graph.op import Op from aesara.scalar import UnaryScalarOp, upgrade_to_float_no_complex from aesara.scan import until from aesara.tensor.elemwise import Elemwise from aesara.tensor.slinalg import Cholesky, Solve from pymc3.aesaraf import floatX from pymc3.distributions.shape_utils import to_tuple from pymc3.distributions.special import gammaln f = floatX c = -0.5 * np.log(2.0 * np.pi) _beta_clip_values = { dtype: (np.nextafter(0, 1, dtype=dtype), np.nextafter(1, 0, dtype=dtype)) for dtype in ["float16", "float32", "float64"] } def bound(logp, *conditions, **kwargs): """ Bounds a log probability density with several conditions. When conditions are not met, the logp values are replaced by -inf. Note that bound should not be used to enforce the logic of the logp under the normal support as it can be disabled by the user via check_bounds = False in pm.Model() Parameters ---------- logp: float
def _step_impl(self): t = self.t D = self.D max_step = self.max_step min_step = 10 * np.abs(np.nextafter(t, self.direction * np.inf) - t) if self.h_abs > max_step: h_abs = max_step change_D(D, self.order, max_step / self.h_abs) self.n_equal_steps = 0 elif self.h_abs < min_step: h_abs = min_step change_D(D, self.order, min_step / self.h_abs) self.n_equal_steps = 0 else: h_abs = self.h_abs atol = self.atol rtol = self.rtol order = self.order alpha = self.alpha gamma = self.gamma error_const = self.error_const J = self.J LU = self.LU current_jac = self.jac is None step_accepted = False while not step_accepted: if h_abs < min_step: return False, self.TOO_SMALL_STEP h = h_abs * self.direction t_new = t + h if self.direction * (t_new - self.t_bound) > 0: t_new = self.t_bound change_D(D, order, np.abs(t_new - t) / h_abs) self.n_equal_steps = 0 LU = None h = t_new - t h_abs = np.abs(h) y_predict = np.sum(D[:order + 1], axis=0) scale = atol + rtol * np.abs(y_predict) psi = np.dot(D[1:order + 1].T, gamma[1:order + 1]) / alpha[order] converged = False c = h / alpha[order] while not converged: if LU is None: LU = self.lu(self.I - c * J) converged, n_iter, y_new, d = solve_bdf_system( self.fun, t_new, y_predict, c, psi, LU, self.solve_lu, scale, self.newton_tol) if not converged: if current_jac: break J = self.jac(t_new, y_predict) LU = None current_jac = True if not converged: factor = 0.5 h_abs *= factor change_D(D, order, factor) self.n_equal_steps = 0 LU = None continue safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER + n_iter) scale = atol + rtol * np.abs(y_new) error = error_const[order] * d error_norm = norm(error / scale) if error_norm > 1: factor = max(MIN_FACTOR, safety * error_norm**(-1 / (order + 1))) h_abs *= factor change_D(D, order, factor) self.n_equal_steps = 0 # As we didn't have problems with convergence, we don't # reset LU here. else: step_accepted = True self.n_equal_steps += 1 self.t = t_new self.y = y_new self.h_abs = h_abs self.J = J self.LU = LU # Update differences. The principal relation here is # D^{j + 1} y_n = D^{j} y_n - D^{j} y_{n - 1}. Keep in mind that D # contained difference for previous interpolating polynomial and # d = D^{k + 1} y_n. Thus this elegant code follows. D[order + 2] = d - D[order + 1] D[order + 1] = d for i in reversed(range(order + 1)): D[i] += D[i + 1] if self.n_equal_steps < order + 1: return True, None if order > 1: error_m = error_const[order - 1] * D[order] error_m_norm = norm(error_m / scale) else: error_m_norm = np.inf if order < MAX_ORDER: error_p = error_const[order + 1] * D[order + 2] error_p_norm = norm(error_p / scale) else: error_p_norm = np.inf error_norms = np.array([error_m_norm, error_norm, error_p_norm]) factors = error_norms**(-1 / np.arange(order, order + 3)) delta_order = np.argmax(factors) - 1 order += delta_order self.order = order factor = min(MAX_FACTOR, safety * np.max(factors)) self.h_abs *= factor change_D(D, order, factor) self.n_equal_steps = 0 self.LU = None return True, None
def test_half_fpe(self): with np.errstate(all='raise'): sx16 = np.array((1e-4,), dtype=float16) bx16 = np.array((1e4,), dtype=float16) sy16 = float16(1e-4) by16 = float16(1e4) # Underflow errors assert_raises_fpe('underflow', lambda a, b:a*b, sx16, sx16) assert_raises_fpe('underflow', lambda a, b:a*b, sx16, sy16) assert_raises_fpe('underflow', lambda a, b:a*b, sy16, sx16) assert_raises_fpe('underflow', lambda a, b:a*b, sy16, sy16) assert_raises_fpe('underflow', lambda a, b:a/b, sx16, bx16) assert_raises_fpe('underflow', lambda a, b:a/b, sx16, by16) assert_raises_fpe('underflow', lambda a, b:a/b, sy16, bx16) assert_raises_fpe('underflow', lambda a, b:a/b, sy16, by16) assert_raises_fpe('underflow', lambda a, b:a/b, float16(2.**-14), float16(2**11)) assert_raises_fpe('underflow', lambda a, b:a/b, float16(-2.**-14), float16(2**11)) assert_raises_fpe('underflow', lambda a, b:a/b, float16(2.**-14+2**-24), float16(2)) assert_raises_fpe('underflow', lambda a, b:a/b, float16(-2.**-14-2**-24), float16(2)) assert_raises_fpe('underflow', lambda a, b:a/b, float16(2.**-14+2**-23), float16(4)) # Overflow errors assert_raises_fpe('overflow', lambda a, b:a*b, bx16, bx16) assert_raises_fpe('overflow', lambda a, b:a*b, bx16, by16) assert_raises_fpe('overflow', lambda a, b:a*b, by16, bx16) assert_raises_fpe('overflow', lambda a, b:a*b, by16, by16) assert_raises_fpe('overflow', lambda a, b:a/b, bx16, sx16) assert_raises_fpe('overflow', lambda a, b:a/b, bx16, sy16) assert_raises_fpe('overflow', lambda a, b:a/b, by16, sx16) assert_raises_fpe('overflow', lambda a, b:a/b, by16, sy16) assert_raises_fpe('overflow', lambda a, b:a+b, float16(65504), float16(17)) assert_raises_fpe('overflow', lambda a, b:a-b, float16(-65504), float16(17)) assert_raises_fpe('overflow', np.nextafter, float16(65504), float16(np.inf)) assert_raises_fpe('overflow', np.nextafter, float16(-65504), float16(-np.inf)) assert_raises_fpe('overflow', np.spacing, float16(65504)) # Invalid value errors assert_raises_fpe('invalid', np.divide, float16(np.inf), float16(np.inf)) assert_raises_fpe('invalid', np.spacing, float16(np.inf)) assert_raises_fpe('invalid', np.spacing, float16(np.nan)) assert_raises_fpe('invalid', np.nextafter, float16(np.inf), float16(0)) assert_raises_fpe('invalid', np.nextafter, float16(-np.inf), float16(0)) assert_raises_fpe('invalid', np.nextafter, float16(0), float16(np.nan)) # These should not raise float16(65472)+float16(32) float16(2**-13)/float16(2) float16(2**-14)/float16(2**10) np.spacing(float16(-65504)) np.nextafter(float16(65504), float16(-np.inf)) np.nextafter(float16(-65504), float16(np.inf)) float16(2**-14)/float16(2**10) float16(-2**-14)/float16(2**10) float16(2**-14+2**-23)/float16(2) float16(-2**-14-2**-23)/float16(2)
def main(dirpath, skip_old=False, num_specs=1): uc = ursgal.UController() uc.params.update({ 'bigger_scores_better': False, 'num_compared_psms': 10, 'accept_conflicting_psms': False, 'threshold_is_log10': True, 'score_diff_threshold': 1, 'psm_defining_colnames': [ 'Spectrum Title', 'Sequence', ], }) pkl_name = os.path.join(dirpath, 'datasets_result.pkl') fdr_pkl_name = os.path.join(dirpath, 'fdr_result.pkl') old_exists = False if os.path.exists(pkl_name) and skip_old is True: #load results from previous analysis #will only add datasets that are not part of it already print('>>>>>>>> loading pkl <<<<<<<<<<<') results_dict = pickle.load(open(pkl_name, 'rb')) fdr_dict = pickle.load(open(fdr_pkl_name, 'rb')) old_exists = True else: #collect proteins and peptides from result csv, #store in dict with all important data results_dict = { 'all': { 'num_spectra': 0, 'instrument': set(), 'lab': set(), #protein_groups, proteins and peptides are dicts that contain sets for each level of confidence 'protein_groups': { 'all': set(), 'safe_psm': set(), 'safe_seq': set(), 'safe_seq_num_spec': set(), 'safe_seq_num_spec_0005': set() }, 'proteins': { 'all': set(), 'safe_psm': set(), 'safe_seq': set(), 'safe_seq_num_spec': set(), 'safe_seq_num_spec_0005': set() }, 'peptides': { 'all': set(), 'safe': set(), 'safe_num_specs': set() }, 'spectra': { 'all': set() }, #protein_dict in contrast is a nested dict with protein/protein_group --> peptide sequence --> spectral information #(containing lists of 'spec_title', 'bayes_pep', modifications', 'charge', 'psm_q_value', 'start_stop') 'protein_dict': {}, 'original_results': { 'peptides': { 'all': set(), 'safe': set() }, 'spectra': { 'all': set() }, }, '3engines_results': { 'peptides': { 'all': set(), 'safe': set() }, 'spectra': { 'all': set() }, }, 'combined_results': { 'peptides': { 'all': set(), 'safe': set() }, 'spectra': { 'all': set() }, } } } fdr_dict = { 'peptides_seq_level': {}, 'peptides_psm_level': {}, 'peptides_seq_level_2specs': {}, 'proteins_seq_level': {}, 'proteins_psm_level': {}, 'proteins_seq_level_2specs': {}, } result_file_list = [] org_peptide_dict = {} for PRIDE_ID in datasets.keys(): if skip_old is True and old_exists is True and PRIDE_ID in results_dict: continue print('reading:', PRIDE_ID) instrument = datasets[PRIDE_ID]['instrument'] results_dict['all']['instrument'].add(instrument) lab = datasets[PRIDE_ID]['lab'] results_dict['all']['lab'].add(lab) results_dict['all']['num_spectra'] += datasets[PRIDE_ID]['num_spectra'] if PRIDE_ID not in results_dict.keys(): results_dict[PRIDE_ID] = { 'num_spectra': datasets[PRIDE_ID]['num_spectra'], 'instrument': instrument, 'lab': lab, 'protein_groups': { 'all': set(), 'safe_psm': set(), 'safe_seq': set(), 'safe_seq_num_spec': set(), 'safe_seq_num_spec_0005': set() }, 'proteins': { 'all': set(), 'safe_psm': set(), 'safe_seq': set(), 'safe_seq_num_spec': set(), 'safe_seq_num_spec_0005': set() }, 'peptides': { 'all': set(), 'safe': set(), 'safe_num_specs': set() }, 'spectra': { 'all': set() }, 'protein_dict': {}, 'original_results': { 'peptides': { 'all': set(), 'safe': set() }, 'spectra': { 'all': set() }, }, '3engines_results': { 'peptides': { 'all': set(), 'safe': set() }, 'spectra': { 'all': set() }, }, 'combined_results': { 'peptides': { 'all': set(), 'safe': set() }, 'spectra': { 'all': set() }, }, } results2be_merged = [] if type(datasets[PRIDE_ID]['result_file']) == list: print('list', PRIDE_ID) for result_file in datasets[PRIDE_ID]['result_file']: results2be_merged.append(os.path.join(PRIDE_ID, result_file)) elif datasets[PRIDE_ID]['result_file'] is not None: print('not_list', PRIDE_ID) results2be_merged.append( os.path.join(PRIDE_ID, datasets[PRIDE_ID]['result_file'])) else: print('Could not find result file(s) for dataset:', PRIDE_ID) sys.exit(1) # merge if multiple files merged_file = uc.execute_misc_engine( input_file=results2be_merged, engine='merge_csvs', merge_duplicates=False, ) #collect proteins, peptides and corresponding spectrum_titles result_file_list.append(merged_file) protein_ids = set() protein_groups = set() with open(merged_file, 'r') as in_file: result_csv = csv.DictReader(in_file) for line_dict in result_csv: seq = line_dict['Sequence'] #+ line_dict['Modifications'] mod = line_dict['Modifications'] charge = line_dict['Charge'] seq_mod = '{0}#{1}'.format(seq, mod) seq_length = len(seq) spec_title = line_dict['Spectrum Title'] sample = spec_title.split('.')[0] is_decoy = line_dict['Is decoy'] prot = line_dict['Protein ID'] start = line_dict['Sequence Start'] stop = line_dict['Sequence Stop'] psm_q_value = float(line_dict['combined PEP']) bayes_pep = float(line_dict['Bayes PEP']) if psm_q_value <= 0.01: if seq_length not in fdr_dict['peptides_psm_level'].keys(): fdr_dict['peptides_psm_level'][seq_length] = {} if seq not in fdr_dict['peptides_psm_level'][ seq_length].keys(): fdr_dict['peptides_psm_level'][seq_length][seq] = ( psm_q_value, is_decoy) elif psm_q_value < fdr_dict['peptides_psm_level'][ seq_length][seq][0]: fdr_dict['peptides_psm_level'][seq_length][seq] = ( psm_q_value, is_decoy) else: print( 'Results should be filtered by combined PEP <= 1% (but should contain targets and decoys)' ) sys.exit(1) # differentiate between protein groups and proteins # and remove contaminants if len(prot.split('<|>')) > 1: contaminants = True for p in prot.split('<|>'): prot_id = p.split(' ')[0] if 'HVO' not in prot_id: continue else: contaminants = False if contaminants is False and is_decoy == 'false': results_dict[PRIDE_ID]['protein_groups']['all'].add( line_dict['Protein ID']) results_dict[PRIDE_ID]['peptides']['all'].add(seq) results_dict[PRIDE_ID]['spectra']['all'].add( spec_title) else: contaminants = False prot_id = prot.split(' ')[0] if 'HVO' not in prot_id: contaminants = True if contaminants is False and is_decoy == 'false': results_dict[PRIDE_ID]['proteins']['all'].add( line_dict['Protein ID']) results_dict[PRIDE_ID]['peptides']['all'].add(seq) results_dict[PRIDE_ID]['spectra']['all'].add( spec_title) #add info to protein_dict if prot not in results_dict[PRIDE_ID]['protein_dict'].keys(): results_dict[PRIDE_ID]['protein_dict'][prot] = {} if seq not in results_dict[PRIDE_ID]['protein_dict'][ prot].keys(): results_dict[PRIDE_ID]['protein_dict'][prot][seq] = { 'spec_title': [], 'bayes_pep': [], 'modifications': [], 'charge': [], 'psm_q_value': [], 'start_stop': (start, stop), } results_dict[PRIDE_ID]['protein_dict'][prot][seq][ 'spec_title'].append(spec_title) results_dict[PRIDE_ID]['protein_dict'][prot][seq][ 'bayes_pep'].append(bayes_pep) results_dict[PRIDE_ID]['protein_dict'][prot][seq][ 'psm_q_value'].append(psm_q_value) results_dict[PRIDE_ID]['protein_dict'][prot][seq][ 'modifications'].append(mod) results_dict[PRIDE_ID]['protein_dict'][prot][seq][ 'charge'].append(charge) #read results from original (and intermediate) result files for file_type in ['original_file', '3engines_file', 'combined_file']: results_type = '{0}_results'.format(file_type.split('_')[0]) if datasets[PRIDE_ID][file_type] is not None: filepath_org = os.path.join( dirpath, PRIDE_ID, datasets[PRIDE_ID][file_type], ) with open(filepath_org, 'r') as in_file: result_csv = csv.DictReader(in_file) for line_dict in result_csv: seq = line_dict['Sequence'] spec_title = line_dict['Spectrum Title'] results_dict[PRIDE_ID][results_type]['peptides'][ 'all'].add(seq) results_dict[PRIDE_ID][results_type]['spectra'][ 'all'].add(spec_title) if seq not in org_peptide_dict.keys(): org_peptide_dict[seq] = set() org_peptide_dict[seq].add(spec_title) # merge identifications from each dataset into "all" for level in ['protein_groups', 'proteins', 'peptides', 'spectra']: results_dict['all'][level]['all'] |= results_dict[PRIDE_ID][level][ 'all'] for results_type in [ 'original_results', '3engines_results', 'combined_results' ]: for level in ['peptides', 'spectra']: results_dict['all'][results_type][level][ 'all'] |= results_dict[PRIDE_ID][results_type][level][ 'all'] for prot in results_dict[PRIDE_ID]['protein_dict'].keys(): if prot not in results_dict['all']['protein_dict'].keys(): results_dict['all']['protein_dict'][prot] = {'datasets': set()} results_dict['all']['protein_dict'][prot]['datasets'].add(PRIDE_ID) for seq in results_dict[PRIDE_ID]['protein_dict'][prot].keys(): start_stop = results_dict[PRIDE_ID]['protein_dict'][prot][seq][ 'start_stop'] if seq not in results_dict['all']['protein_dict'][prot].keys(): results_dict['all']['protein_dict'][prot][seq] = { 'spec_title': [], 'bayes_pep': [], 'modifications': [], 'charge': [], 'psm_q_value': [], 'start_stop': start_stop, } for k, v in results_dict[PRIDE_ID]['protein_dict'][prot][ seq].items(): if k == 'start_stop': continue results_dict['all']['protein_dict'][prot][seq][k].extend(v) # Calculate q-values # peptides first, then proteins for PRIDE_ID in results_dict.keys(): # generate input dict for q_value calculation function seq_q_value_dict = {} for prot in results_dict[PRIDE_ID]['protein_dict'].keys(): for seq in results_dict[PRIDE_ID]['protein_dict'][prot].keys(): if seq == 'datasets': continue seq_length = len(seq) if seq_length not in seq_q_value_dict.keys(): seq_q_value_dict[seq_length] = {} min_bayes_pep = min(results_dict[PRIDE_ID]['protein_dict'] [prot][seq]['bayes_pep']) if 'decoy_' in prot: is_decoy = True else: is_decoy = False seq_q_value_dict[seq_length][seq] = { 'Bayes PEP': min_bayes_pep, 'Is decoy': is_decoy, } print('calculating q-values on peptide level') seq_calc_q_value_dict = calculate_q_value_by_group(seq_q_value_dict, sliding=False) # read results from peptide q_value calc, at the same time # generate input dict for proteins for q_value calculation function prot_q_value_dict = {'seq_level': {}, 'psm_level': {}} for prot in results_dict[PRIDE_ID]['protein_dict'].keys(): contaminants = False prot_id = prot.split(' ')[0] if 'HVO' not in prot_id: contaminants = True if 'decoy_' in prot: is_decoy = True else: is_decoy = False for seq in results_dict[PRIDE_ID]['protein_dict'][prot].keys(): if seq == 'datasets': continue seq_length = len(seq) seq_q_value = seq_calc_q_value_dict[seq_length][seq][ 'combined PEP'] results_dict[PRIDE_ID]['protein_dict'][prot][seq][ 'seq_q_value'] = seq_q_value if seq_q_value <= SEQ_Q_VALUE_THRESHOLD: if PRIDE_ID == 'all': if seq_length not in fdr_dict[ 'peptides_seq_level'].keys(): fdr_dict['peptides_seq_level'][seq_length] = {} fdr_dict['peptides_seq_level'][seq_length][seq] = ( seq_q_value, is_decoy) counts = len( set(results_dict[PRIDE_ID]['protein_dict'][prot][seq] ['spec_title'])) if is_decoy is False and contaminants is False: results_dict[PRIDE_ID]['peptides']['safe'].add(seq) if counts >= num_specs: results_dict[PRIDE_ID]['peptides'][ 'safe_num_specs'].add(seq) if PRIDE_ID == 'all': if seq_length not in fdr_dict[ 'peptides_seq_level_2specs'].keys(): fdr_dict['peptides_seq_level_2specs'][ seq_length] = {} fdr_dict['peptides_seq_level_2specs'][ seq_length][seq] = (seq_q_value, is_decoy) min_bayes_pep = min(results_dict[PRIDE_ID]['protein_dict'] [prot][seq]['bayes_pep']) if min_bayes_pep == 0.0: min_bayes_pep = np.nextafter(0, 1) log_seq_bayes = math.log10(min_bayes_pep) if prot not in prot_q_value_dict['seq_level'].keys(): prot_q_value_dict['seq_level'][prot] = { 'Bayes PEP': log_seq_bayes, 'Is decoy': is_decoy, } else: prot_q_value_dict['seq_level'][prot][ 'Bayes PEP'] += log_seq_bayes for bayes_pep in results_dict[PRIDE_ID]['protein_dict'][prot][ seq]['bayes_pep']: if bayes_pep == 0.0: bayes_pep = np.nextafter(0, 1) log_psm_bayes = math.log10(bayes_pep) if prot not in prot_q_value_dict['psm_level'].keys(): prot_q_value_dict['psm_level'][prot] = { 'Bayes PEP': log_seq_bayes, 'Is decoy': is_decoy, } else: prot_q_value_dict['psm_level'][prot][ 'Bayes PEP'] += log_seq_bayes print('calculating q-values on protein level') prot_calc_q_value_dict = calculate_q_value_by_group(prot_q_value_dict, sliding=False, picked_fdr=True) # read results from protein q_value calc for prot in results_dict[PRIDE_ID]['protein_dict'].keys(): contaminants = False prot_id = prot.split(' ')[0] if 'HVO' not in prot_id: contaminants = True if 'decoy_' in prot: is_decoy = True else: is_decoy = False for level in ['psm_level', 'seq_level']: if prot in prot_calc_q_value_dict[level].keys(): prot_q_value = prot_calc_q_value_dict[level][prot][ 'combined PEP'] prot_bayes_pep = prot_calc_q_value_dict[level][prot][ 'Bayes PEP'] else: prot_q_value = 1 prot_bayes_pep = 1 # count number of spectra for each prot (for seq FDR > 1%) # collect samples for simple protein inference model counts = 0 samples = set() for seq in results_dict[PRIDE_ID]['protein_dict'][prot].keys(): if seq in [ 'datasets', 'prot_q_value_seq', 'prot_q_value_psm', 'samples' ]: continue if results_dict[PRIDE_ID]['protein_dict'][prot][seq][ 'seq_q_value'] > 0.01: continue psm_set = set(results_dict[PRIDE_ID]['protein_dict'][prot] [seq]['spec_title']) counts += len(psm_set) for psm in psm_set: ms_filename = '.'.join(psm.split('.')[:-3]) samples.add( ms_filename2sample.get(ms_filename, ms_filename)) if PRIDE_ID == 'all': if level == 'seq_level': fdr_dict['proteins_seq_level'][prot] = (prot_bayes_pep, is_decoy) if counts >= num_specs: fdr_dict['proteins_seq_level_2specs'][prot] = ( prot_bayes_pep, is_decoy) else: fdr_dict['proteins_psm_level'][prot] = (prot_bayes_pep, is_decoy) if prot_q_value <= 0.01 and is_decoy is False and contaminants is False: if level == 'seq_level': if len(prot.split('<|>')) > 1: results_dict[PRIDE_ID]['protein_groups'][ 'safe_seq'].add(prot) if counts >= num_specs: results_dict[PRIDE_ID]['protein_groups'][ 'safe_seq_num_spec'].add(prot) if prot_q_value <= PROT_Q_VALUE_THRESHOLD: results_dict[PRIDE_ID]['protein_groups'][ 'safe_seq_num_spec_0005'].add(prot) else: results_dict[PRIDE_ID]['proteins']['safe_seq'].add( prot) if counts >= num_specs: results_dict[PRIDE_ID]['proteins'][ 'safe_seq_num_spec'].add(prot) if prot_q_value <= PROT_Q_VALUE_THRESHOLD: results_dict[PRIDE_ID]['proteins'][ 'safe_seq_num_spec_0005'].add(prot) elif counts >= num_specs: if len(prot.split('<|>')) > 1: results_dict[PRIDE_ID]['protein_groups'][ 'safe_psm'].add(prot) else: results_dict[PRIDE_ID]['proteins']['safe_psm'].add( prot) if level == 'seq_level': results_dict[PRIDE_ID]['protein_dict'][prot][ 'prot_q_value_seq'] = prot_q_value else: results_dict[PRIDE_ID]['protein_dict'][prot][ 'prot_q_value_psm'] = prot_q_value results_dict[PRIDE_ID]['protein_dict'][prot][ 'samples'] = samples print( 'Number of confident protein identifications for {0}: {1}'.format( PRIDE_ID, len(results_dict[PRIDE_ID]['proteins'] ['safe_seq_num_spec_0005']))) #save results in a pkl pickle.dump(results_dict, open(pkl_name, 'wb')) print('pickled results: ', pkl_name) pickle.dump(fdr_dict, open(fdr_pkl_name, 'wb')) print('pickled fdr_dict: ', fdr_pkl_name)
def sample(self, n, d=None, rng=np.random): shape = self._sample_shape(n, d) x = rng.exponential(self.scale, shape) + self.shift high = np.nextafter(self.high, np.asarray(-np.inf, dtype=x.dtype)) return npext.clip(x, self.shift, high)
def __init__(self, dtype, default_round, warp="linear", values=None, range_=None): """Generic constructor of `Space` class. Not intended to be called directly but instead by child classes. However, `Space` is not an abstract class and will not give an error when instantiated. """ self.dtype = dtype assert warp in WARP_DICT, "invalid space %s, allowed spaces are: %s" % ( str(warp), str(WARP_DICT.keys())) self.warp_f = WARP_DICT[warp] self.unwarp_f = UNWARP_DICT[warp] # Setup range and rounding if values is suplied assert (values is None) != (range_ is None) round_to_values = default_round if range_ is None: # => value is not None # Debatable if unique should be done before or after cast. But I # think after is better, esp. when changing precisions. values = np.asarray(values, dtype=dtype) values = np.unique(values) # values now 1D ndarray no matter what check_array( values, "unique values", pre=True, ndim=1, dtype=dtype, min_size=2, allow_infinity=False, allow_nan=False, ) # Extrapolation might happen due to numerics in type conversions. # Bounds checking is still done in validate routines. round_to_values = interp1d(values, values, kind="nearest", fill_value="extrapolate") range_ = (values[0], values[-1]) # Save values and rounding # Values is either None or was validated inside if statement self.values = values self.round_to_values = round_to_values # Note that if dtype=None that is the default for asarray. range_ = np.asarray(range_, dtype=dtype) check_array(range_, "range", pre=True, shape=(2, ), dtype=dtype, unsorted=False) # Save range info, with input validation and post validation self.lower, self.upper = range_ # Convert to warped bounds too with lots of post validation self.lower_warped, self.upper_warped = self.warp_f( range_[..., None]).astype(WARPED_DTYPE, copy=False) check_array( self.lower_warped, "warped lower bound %s(%.1f)" % (warp, self.lower), ndim=1, pre=True, dtype=WARPED_DTYPE, allow_infinity=False, allow_nan=False, ) # Should never happen if warpers are strictly monotonic: assert np.all(self.lower_warped <= self.upper_warped) # Make sure a bit bigger to keep away from lower due to numerics self.upper_warped = np.maximum(self.upper_warped, np.nextafter(self.lower_warped, np.inf)) check_array( self.upper_warped, "warped upper bound %s(%.1f)" % (warp, self.upper), pre=True, shape=self.lower_warped.shape, dtype=WARPED_DTYPE, allow_infinity=False, allow_nan=False, ) # Should never happen if warpers are strictly monotonic: assert np.all(self.lower_warped < self.upper_warped)
def _step_impl(self): t = self.t y = self.y max_step = self.max_step rtol = self.rtol atol = self.atol min_step = 10 * np.abs(np.nextafter(t, self.direction * np.inf) - t) if self.h_abs > max_step: h_abs = max_step elif self.h_abs < min_step: h_abs = min_step else: h_abs = self.h_abs step_accepted = False step_rejected = False while not step_accepted: if h_abs < min_step: return False, self.TOO_SMALL_STEP h = h_abs * self.direction t_new = t + h if self.direction * (t_new - self.t_bound) > 0: t_new = self.t_bound h = t_new - t h_abs = np.abs(h) y_new, f_new = rk_step(self.fun, t, y, self.f, h, self.A, self.B, self.C, self.K) scale = atol + np.maximum(np.abs(y), np.abs(y_new)) * rtol error_norm = self._estimate_error_norm(self.K, h, scale) if error_norm < 1: if error_norm == 0: factor = MAX_FACTOR else: factor = min(MAX_FACTOR, SAFETY * error_norm**self.error_exponent) if step_rejected: factor = min(1, factor) h_abs *= factor step_accepted = True else: h_abs *= max(MIN_FACTOR, SAFETY * error_norm**self.error_exponent) step_rejected = True self.h_previous = h self.y_old = y self.t = t_new self.y = y_new self.h_abs = h_abs self.f = f_new return True, None
def fit(self, X): self.mean = np.mean(X, axis=0) self.var = np.var(X, axis=0) + np.nextafter(0, 1)
def fitgalaxy(img, psfs, sigmainverse, band, modelspecs, mask=None, modellib=None, modellibopts=None, plot=False, name=None, models=None, fitsbyengine=None, redoall=True, ): """ :param img: ndarray; 2D Image :param psfs: Collection of proutil.PSF object :param sigmainverse: ndarray; 2D Inverse sigma image ndarr :param band: string; Filter/passband name :param mask: ndarray; 2D Inverse mask image (1=include, 0=omit) :param modelspecs: Model specifications as returned by getmodelspecs :param modellib: string; Model fitting library :param modellibopts: dict; Model fitting library options :param plot: bool; Make plots? :param name: string; Name of the model for plot labelling :return: fitsbyengine, models: tuple of complicated structures: modelinfos: dict; key=model name: value=dict; TBD models: dict; key=engine name: value=dict(key=model type: value=proobj.Model of that type) psfmodels: dict: TBD """ initfrommoments = {name: value for name, value in zip(["axrat", "ang", "re"], getellipseestimate(img.array))} engines = { "galsim": {"gsparams": gs.GSParams(kvalue_accuracy=1e-2, integration_relerr=1e-2, integration_abserr=1e-3, maximum_fft_size=16384)} } title = name if plot else None npiximg = np.flip(img.array.shape, axis=0) flux = np.sum(img.array[mask] if mask is not None else img.array) valuesmax = { "re": np.sqrt(np.sum((npiximg/2.)**2)), "flux": 10*np.sum(img.array), } # TODO: validate specs specs = {name: idx for idx, name in enumerate(modelspecs[1])} models = {} if (models is None) or redoall else models paramsfixeddefault = {} fitsbyengine = {} if ((models is None) or (fitsbyengine is None) or redoall) else fitsbyengine usemodellibdefault = modellibopts is None for engine, engineopts in engines.items(): if (engine not in fitsbyengine) or redoall: fitsbyengine[engine] = {} fitsengine = fitsbyengine[engine] if plot: nrows = len(modelspecs[0]) # Change to landscape figure, axes = plt.subplots(nrows=min([5, nrows]), ncols=max([5, nrows])) if nrows > 5: axes = np.transpose(axes) # This keeps things consistent with the nrows>1 case if nrows == 1: axes = np.array([axes]) plt.suptitle(title + " {} model".format(engine)) flipplot = nrows > 5 else: figure = None axes = None flipplot = None for modelidx, modelinfo in enumerate(modelspecs[0]): modelname = modelinfo[specs["name"]] modeltype = modelinfo[specs["model"]] modeldefault = proutil.getmodel( {band: flux}, modeltype, npiximg, engine=engine, engineopts=engineopts ) paramsfixeddefault[modeltype] = [param.fixed for param in modeldefault.getparameters(fixed=True)] model = modeldefault if (redoall or modeltype not in models) else models[modeltype] psfname = modelinfo[specs["psfmodel"]] + ("_pixelated" if proutil.str2bool( modelinfo[specs["psfpixel"]]) else "") proutil.setexposure(model, band, image=img.array, sigmainverse=sigmainverse, psf=psfs[psfname]["object"], mask=mask) if not redoall and (modelname in fitsbyengine[engine]): if plot: valuesbest = fitsengine[modelname]['fits'][-1]['paramsbestalltransformed'] # TODO: consider how to avoid code repetition here and below modeldescs = {x: [] for x in ['f', 'n', 'r']} formats = {x: '{:.1f}' if x == 'r' else '{:.2f}' for x in ['f', 'n', 'r']} for param, value in zip(model.getparameters(fixed=True), valuesbest): param.setvalue(value, transformed=True) if param.name == "nser": modeldescs['n'].append(param) elif param.name == "re": modeldescs['r'].append(param) elif isfluxratio(param) and param.getvalue(transformed=False) < 1: modeldescs['f'].append(param) modeldescs = [paramname + '=' + ','.join( [formats[paramname] .format(param.getvalue(transformed=False)) for param in params]) for paramname, params in modeldescs.items() if params] modeldescs = ';'.join(modeldescs) if title is not None: plt.suptitle(title) model.evaluate(plot=plot, modelname=modelname, modeldesc=modeldescs if modeldescs else None, figure=figure, axes=axes, figurerow=modelidx, flipplot=flipplot) plt.show(block=False) else: inittype = modelinfo[specs["inittype"]] if inittype == "moments": for param in model.getparameters(fixed=False): if param.name in initfrommoments: param.setvalue(initfrommoments[param.name], transformed=False) else: # TODO: Refactor into function if inittype.startswith("best"): if inittype == "best": modelnamecomps = [] for modelidxcomp in range(modelidx): modelinfocomp = modelspecs[0][modelidxcomp] if modelinfocomp[specs["model"]] == modeltype: modelnamecomps.append(modelinfocomp[specs['name']]) else: # TODO: Check this more thoroughly modelnamecomps = inittype.split(":")[1].split(";") print(modelnamecomps) chisqredbest = np.Inf for modelnamecomp in modelnamecomps: chisqred = fitsbyengine[engine][modelnamecomp]["fits"][-1]["chisqred"] if chisqred < chisqredbest: chisqredbest = chisqred inittype = modelnamecomp else: inittype = inittype.split(';') if len(inittype) > 1: modelfits = [{ 'paramvals': fitsengine[initname]['fits'][-1]['paramsbestall'], 'paramtree': models[fitsengine[initname]['modeltype']].getparameters( fixed=True, flatten=False), 'params': models[fitsengine[initname]['modeltype']].getparameters(fixed=True), 'chisqred': fitsengine[initname]['fits'][-1]['chisqred'], 'modeltype': fitsengine[initname]['modeltype']} for initname in inittype ] initmodelfrommodelfits(model, modelfits) inittype = None else: inittype = inittype[0] if inittype not in fitsbyengine[engine]: # TODO: Fail or fall back here? raise RuntimeError("Model {} can't find reference {} " "to initialize from".format(modelname, inittype)) if inittype: paramvalsinit = fitsbyengine[engine][inittype]["fits"][-1]["paramsbestall"] for param, value in zip(model.getparameters(fixed=True), paramvalsinit): param.setvalue(value, transformed=False) # Reset parameter fixed status for param, fixed in zip(model.getparameters(fixed=True), paramsfixeddefault[modeltype]): param.fixed = fixed # Parse default overrides from model spec paramflags = {} for flag in ["fixedparams", "initparams"]: paramflags[flag] = {} values = modelinfo[specs[flag]] if values: for flagvalue in values.split(";"): if flag == "fixedparams": paramflags[flag][flagvalue] = None elif flag == "initparams": value = flagvalue.split("=") # TODO: sort this out valuesplit = [np.float(x) for x in value[1].split(',')] paramflags[flag][value[0]] = valuesplit # For printing parameter values when plotting modelnameappendparams = [] # Now actually apply the overrides and the hardcoded maxima timesmatched = {} for param in model.getparameters(fixed=True): if param.name in paramflags["fixedparams"]: param.fixed = True if param.name in paramflags["initparams"]: if param.name not in timesmatched: timesmatched[param.name] = 0 param.setvalue(paramflags["initparams"][param.name][timesmatched[param.name]], transformed=False) timesmatched[param.name] += 1 isfluxrat = isfluxratio(param) if plot and not param.fixed: if param.name == "nser": modelnameappendparams += [("n={:.2f}", param)] elif isfluxrat: modelnameappendparams += [("f={:.2f}", param)] if param.name in valuesmax and not isfluxrat: transform = param.transform.transform param.limits = proobj.Limits(lower=transform(0), upper=transform(valuesmax[param.name]), transformed=True) # Reset non-finite free param values # This occurs e.g. at the limits of a logit transformed param if not param.fixed: paramval = param.getvalue(transformed=True) if not np.isfinite(paramval): param.setvalue( np.nextafter(param.getvalue(transformed=False),(-1) ** (paramval < 0)), transformed=False) print("Fitting model {:s} of type {:s} using engine {:s}".format(modelname, modeltype, engine)) sys.stdout.flush() try: fits = [] dosecond = (len(model.sources[0].modelphotometric.components) > 1) or not usemodellibdefault if usemodellibdefault: modellibopts = { "algo": ("cobyla" if modellib == "pygmo" else "COBYLA") if dosecond else ("neldermead" if modellib == "pygmo" else "Nelder-Mead") } if modellib == "scipy": modellibopts['options'] = {'maxfun': 1e4} fit1, modeller = proutil.fitmodel(model, modellib=modellib, modellibopts=modellibopts, printfinal=True, printsteps=100, plot=plot and not dosecond, figure=figure, axes=axes, figurerow=modelidx, flipplot=flipplot, modelname=modelname, modelnameappendparams=modelnameappendparams ) fits.append(fit1) if dosecond: if usemodellibdefault: modeller.modellibopts["algo"] = "neldermead" if modellib == "pygmo" else \ "Nelder-Mead" fit2, _ = proutil.fitmodel(model, modeller, printfinal=True, printsteps=100, plot=plot, figure=figure, axes=axes, figurerow=modelidx, flipplot=flipplot, modelname=modelname, modelnameappendparams=modelnameappendparams) fits.append(fit2) fitsbyengine[engine][modelname] = {"fits": fits, "modeltype": modeltype} except Exception as e: print("Error fitting id={}:".format(idnum)) print(e) trace = traceback.format_exc() print(trace) fitsbyengine[engine][modelname] = e, trace if plot: plt.show(block=False) plt.tight_layout() plt.subplots_adjust(wspace=0.05, hspace=0.05) plt.show(block=False) return fitsbyengine, models
def check_adv(args, device, gap, mat_model_path, model_class, inp, adv_inp, label, eps, *, allow_retry=2): err = np.abs(inp - adv_inp).max() assert 0 <= adv_inp.min() <= adv_inp.max() <= 1 assert err <= eps, (err, eps, err - eps) with tempfile.NamedTemporaryFile() as modified_model: if args.no_gap: modified_model_name = mat_model_path else: modified_model_name = modified_model.name params = read_mat_file(mat_model_path) params['softmax/bias'][0, label] += gap sio.savemat(modified_model_name, params) mip_verify = MIPVerify( modified_model_name, (model_class.input_size, model_class.input_chl), args.time_limit, ) try: model = (model_class. from_mat(modified_model_name). to(device). use_unstable_conv(not args.stable)) adv_inp_dev = torch.from_numpy(adv_inp).to(device) out0 = torch_as_npy(model(adv_inp_dev)).flatten() out1 = torch_as_npy(model.features_chk(adv_inp_dev)).flatten() test_acc = eval_acc(model, device) lprint(f'conv {out0} l={np.argmax(out0)} ' f'cw={cw_loss_vec(out0, label):.2e}') lprint(f'mm {out1} l={np.argmax(out1)} ' f'cw={cw_loss_vec(out1, label):.2e}') lprint(f'test acc: {test_acc*100:.2f}%') if args.mm: out_adv = out1 else: out_adv = out0 assert np.argmax(out_adv) != label v = mip_verify(inp, label, eps) if not (v['status_known'] and v['robust']): v.pop('PerturbationValue', None) v.pop('PerturbedInputValue', None) lprint('verification of original model failed:', pprint.pformat(v)) if allow_retry > 0 and v['status_known']: try: # try to increase robustness a little bit safe = -cw_loss_vec(out_adv, label) gap += min(-v['ObjectiveValue'], max(safe - 1e-7, safe * 0.99)) gap = float(np.nextafter(gap, float('inf'), dtype=np.float32)) lprint(f'retrying with new gap {gap} ...') return check_adv( args, device, gap, mat_model_path, model_class, inp, adv_inp, label, eps, allow_retry=allow_retry-1) except: traceback.print_exc() return finally: mip_verify.stop() inp_dev = torch.from_numpy(inp).to(device) save_state = { 'inp': inp, 'adv_inp': adv_inp, 'label': label, 'gap': gap, 'eps': eps, 'verify': v, 'test_acc': test_acc, 'adv_out_score': out0, 'adv_out_score_mm': out1, 'inp_out_score': torch_as_npy(model(inp_dev)).flatten(), 'mat_model_path': mat_model_path, 'device': device, 'argv_options': [i for i in sys.argv if i.startswith('-')], } return save_state
def compute_scales_fun(variance, mean): denominator = np.fmax(variance - mean, np.sqrt(np.nextafter(0, 1, dtype=variance.dtype))) groupwise_scales = np.square(mean) / denominator return groupwise_scales
def compute_mi_cd(c, d, n_neighbors=3): leaf_size = 30 c = c.reshape((-1, 1)) n_samples = c.shape[0] n_features = c.shape[1] radius = np.empty(n_samples) label_counts = np.empty(n_samples, dtype=np.int64) k_all = np.empty(n_samples, dtype=np.int8) labels = np_unique(d) n_labels = len(labels) for idx in range(n_labels): label = labels[idx] mask = np.where(d.ravel() == label)[0] count = mask.shape[0] if count > 1: # create the objects that are going to be needed for NN n_levels = 1 + np.log2(max(1, ((count - 1) // leaf_size))) n_nodes = int(2**n_levels) - 1 # allocate arrays for storage idx_array = np.arange(count) node_radius = np.zeros(n_nodes, dtype=np.float64) node_idx_start = np.zeros(n_nodes, dtype=np.int64) node_idx_end = np.zeros(n_nodes, dtype=np.int64) node_is_leaf = np.zeros(n_nodes, dtype=np.int64) node_centroids = np.zeros((n_nodes, n_features), dtype=np.float64) ball_tree.recursive_build(0, 0, count, c[mask], node_centroids, node_radius, idx_array, node_idx_start, node_idx_end, node_is_leaf, n_nodes, leaf_size, metric=0) # This algorithm returns the point itself as a neighbor, so # if n_neighbors need to be returned then '1' needs to be # added to 'k' in order to get the correct value from 'nth' # neighbor when the heap is created k = min(n_neighbors, count - 1) heap_distances, heap_indices = ball_tree.heap_create(count, k + 1) ball_tree.query(0, c[mask], heap_distances, heap_indices, c[mask], idx_array, node_centroids, node_radius, node_is_leaf, node_idx_start, node_idx_end, metric=0) ball_tree.heap_sort(heap_distances, heap_indices) heap_distances = np.sqrt(heap_distances) radius[mask] = np.nextafter(heap_distances[:, -1], 0) k_all[mask] = k label_counts[mask] = count # Ignore points with unique labels mask_unique = np.array( [n if label_counts[n] > 1 else 0 for n in range(n_samples)]) # A whole new set of Tree elements need to be created since the entire # data set is now going to be run throught the algorithm n_samples_kd = c[mask_unique].shape[0] # determine number of levels in the tree, and from this # the number of nodes in the tree. This results in leaf nodes # with numbers of points betweeen leaf_size and 2 * leaf_size n_levels_kd = 1 + np.log2(max(1, ((n_samples_kd - 1) // leaf_size))) # having to round first and then apply int in order to calculate # correct number of nodes n_nodes_kd = int(round((2**n_levels_kd))) - 1 # allocate arrays for storage idx_array_kd = np.arange(n_samples_kd) node_radius_kd = np.zeros(n_nodes_kd, dtype=np.float64) node_idx_start_kd = np.zeros(n_nodes_kd, dtype=np.int64) node_idx_end_kd = np.zeros(n_nodes_kd, dtype=np.int64) node_is_leaf_kd = np.zeros(n_nodes_kd, dtype=np.int64) node_lower_bounds_kd = np.zeros((n_nodes_kd, n_features), dtype=np.float64) node_upper_bounds_kd = np.zeros((n_nodes_kd, n_features), dtype=np.float64) kd_tree.recursive_build(0, 0, n_samples_kd, c[mask_unique], node_lower_bounds_kd, node_upper_bounds_kd, node_radius_kd, idx_array_kd, node_idx_start_kd, node_idx_end_kd, node_is_leaf_kd, n_nodes_kd, leaf_size) count_only = True return_distance = False counts = \ kd_tree.radius_neighbors_count(c[mask_unique], radius[mask_unique], idx_array_kd, node_lower_bounds_kd, node_upper_bounds_kd, node_radius_kd, node_is_leaf_kd, node_idx_start_kd, node_idx_end_kd, count_only, return_distance) mi = (digamma_cpu(n_samples_kd) + np.mean(digamma_cpu(k_all[mask_unique])) - np.mean(digamma_cpu(label_counts[mask_unique])) - np.mean(digamma_cpu(counts))) mi = max(0, mi) return mi
def test_ConsequenceFunction_sample_unit_DV(): """ Test if the function samples the DV distribution properly. Note that we have already tested the sampling algorithm in the uq module, so we will not do a thorough verification of the samples here, but rather check for errors in the inputs that would typically lead to significant mistakes in the results. """ test_quants = [0.5, 1.0, 1.5, 2.0, 2.5] # create a Random Variable with 3 correlated decision variables dims = 3 ref_mean = [1., 1., 0.] ref_std = [0.4, 0.3, 0.2] ref_rho = np.ones((dims, dims)) * 0.8 np.fill_diagonal(ref_rho, 1.0) ref_mean[2] = np.exp(ref_mean[2]) # prepare lower truncation limits at 0 for all... tr_lower = np.zeros(dims).tolist() # and an upper limit at 2 sigma for the second tr_upper = [np.inf, 1.6, np.inf] RV_reg = RandomVariableRegistry() for i, (name, dist, theta, beta) in enumerate( zip(['A', 'B', 'C'], ['normal', 'normal', 'lognormal'], ref_mean, ref_std)): RV_reg.add_RV( RandomVariable(name=name, distribution=dist, theta=[theta, beta], truncation_limits=[tr_lower[i], tr_upper[i]])) RV_reg.add_RV_set( RandomVariableSet('set_A', [RV_reg.RV[rv] for rv in ['A', 'B', 'C']], ref_rho)) RV_reg.generate_samples(sample_size=1000) # first test sampling for each decision variable for r_i, tag in enumerate(['A', 'B', 'C']): # use fixed value for 'B' and bounded linear for the other two if tag == 'B': f_median = prep_constant_median_DV(10.) else: f_median = prep_bounded_linear_median_DV(median_max=20.0, median_min=2.0, quantity_lower=1.0, quantity_upper=2.0) # create the consequence function conseq_function = ConsequenceFunction(DV_median=f_median, DV_distribution=RV_reg.RV[tag]) for qnt in test_quants: samples = conseq_function.sample_unit_DV(quantity=qnt, sample_size=1000) # transform the results to log space for 'C' to facilitate testing if tag == 'C': samples = np.log(samples) ref_mu = np.log(f_median(qnt)) ref_min = np.log(max(np.nextafter(0, 1), tr_lower[r_i])) ref_max = np.log(max(np.nextafter(0, 1), tr_upper[r_i])) a = (ref_min - np.log(ref_mean[r_i])) / ref_std[r_i] b = (ref_max - np.log(ref_mean[r_i])) / ref_std[r_i] ref_max = ref_mu * b else: ref_mu = f_median(qnt) ref_min = tr_lower[r_i] a = (ref_min - ref_mean[r_i]) / ref_std[r_i] b = (tr_upper[r_i] - ref_mean[r_i]) / ref_std[r_i] ref_max = ref_mu * b trNorm = truncnorm( a=a, b=b, loc=ref_mu, scale=ref_std[r_i] if tag == 'C' else ref_std[r_i] * ref_mu) ref_samples = trNorm.rvs(size=1000) # test the means and coefficients of variation assert np.mean(samples) == pytest.approx(np.mean(ref_samples), rel=0.1) assert np.std(samples) == pytest.approx(np.std(ref_samples), rel=0.15) # test the limits assert np.min(samples) > ref_min assert np.max(samples) < ref_max