Exemplo n.º 1
0
    def json_array_numpy(self, s_and_end, scan_once, **kwargs):
        values, end = json.decoder.JSONArray(s_and_end, scan_once, **kwargs)
        if not values:
            return values, end

        # TODO: is it faster to convert to numpy array and check if the
        # resulting dtype is pure numeric?
        if len(values) <= 1000:
            check_values = values
        else:
            check_values = values[::max([len(values) // 1000, 1])]

        if not all([isbarenumeric(v) for v in check_values]):
            return values, end

        values = np.array(values)
        return values, end
Exemplo n.º 2
0
def llh(actual_values, expected_values):
    """Compute the log-likelihoods (llh) that each count in `actual_values`
    came from the the corresponding expected value in `expected_values`.

    Parameters
    ----------
    actual_values, expected_values : numpy.ndarrays of same shape

    Returns
    -------
    llh : numpy.ndarray of same shape as the inputs
        llh corresponding to each pair of elements in `actual_values` and
        `expected_values`.

    Notes
    -----
    * Uncertainties are not propagated through this calculation.
    * Values in `expected_values` are clipped to the range [SMALL_POS, inf]
      prior to the calculation to avoid infinities due to the log function.

    """
    assert actual_values.shape == expected_values.shape

    # Convert to simple numpy arrays containing floats
    if not isbarenumeric(actual_values):
        actual_values = unp.nominal_values(actual_values)
    if not isbarenumeric(expected_values):
        expected_values = unp.nominal_values(expected_values)

    with np.errstate(invalid='ignore'):
        # Mask off any nan expected values (these are assumed to be ok)
        actual_values = np.ma.masked_invalid(actual_values)
        expected_values = np.ma.masked_invalid(expected_values)

        # TODO: How should we handle nan / masked values in the "data"
        # (actual_values) distribution? How about negative numbers?

        # Make sure actual values (aka "data") are valid -- no infs, no nans,
        # etc.
        if np.any((actual_values < 0) | ~np.isfinite(actual_values)):
            msg = (
                '`actual_values` must be >= 0 and neither inf nor nan...\n' +
                maperror_logmsg(actual_values))
            raise ValueError(msg)

        # Check that new array contains all valid entries
        if np.any(expected_values < 0.0):
            msg = ('`expected_values` must all be >= 0...\n' +
                   maperror_logmsg(expected_values))
            raise ValueError(msg)

        # Replace 0's with small positive numbers to avoid inf in log
        np.clip(expected_values,
                a_min=SMALL_POS,
                a_max=np.inf,
                out=expected_values)

    #
    # natural logarith m of the Poisson probability
    # (uses Stirling's approximation to estimate ln(k!) ~ kln(k)-k)
    #
    llh_val = actual_values * np.log(expected_values) - expected_values
    llh_val -= actual_values * np.log(actual_values) - actual_values

    return llh_val
Exemplo n.º 3
0
def chi2(actual_values, expected_values):
    """Compute the chi-square between each value in `actual_values` and
    `expected_values`.

    Parameters
    ----------
    actual_values, expected_values : numpy.ndarrays of same shape

    Returns
    -------
    chi2 : numpy.ndarray of same shape as inputs
        chi-squared values corresponding to each pair of elements in the inputs

    Notes
    -----
    * Uncertainties are not propagated through this calculation.
    * Values in expectation are clipped to the range [SMALL_POS, inf] prior to
      the calculation to avoid infinities due to the divide function.
    * actual_values are allowed to be = 0, since they don't com up in the denominator
    """
    if actual_values.shape != expected_values.shape:
        raise ValueError('Shape mismatch: actual_values.shape = %s,'
                         ' expected_values.shape = %s' %
                         (actual_values.shape, expected_values.shape))

    # Convert to simple numpy arrays containing floats
    if not isbarenumeric(actual_values):
        actual_values = unp.nominal_values(actual_values)
    if not isbarenumeric(expected_values):
        expected_values = unp.nominal_values(expected_values)

    with np.errstate(invalid='ignore'):
        # Mask off any nan expected values (these are assumed to be ok)
        actual_values = np.ma.masked_invalid(actual_values)
        expected_values = np.ma.masked_invalid(expected_values)

        # TODO: this check (and the same for `actual_values`) should probably
        # be done elsewhere... maybe?
        if np.any(actual_values < 0):
            msg = ('`actual_values` must all be >= 0...\n' +
                   maperror_logmsg(actual_values))
            raise ValueError(msg)

        if np.any(expected_values < 0):
            msg = ('`expected_values` must all be >= 0...\n' +
                   maperror_logmsg(expected_values))
            raise ValueError(msg)

        # TODO: Is this okay to do? Mathematically suspect at best, and can
        #       still destroy a minimizer's hopes and dreams...

        # Replace 0's with small positive numbers to avoid inf in division
        np.clip(expected_values,
                a_min=SMALL_POS,
                a_max=np.inf,
                out=expected_values)

        delta = actual_values - expected_values

    if np.all(np.abs(delta) < 5 * FTYPE_PREC):
        return np.zeros_like(delta, dtype=FTYPE)

    chi2_val = np.square(delta) / expected_values
    assert np.all(chi2_val >= 0), str(chi2_val[chi2_val < 0])
    return chi2_val
Exemplo n.º 4
0
def get_prior_bounds(obj, param=None, stddev=1.0):
    """Obtain confidence regions for CL corresponding to given number of
    stddevs from parameter prior.

    Parameters
    ----------
    obj : string or Mapping
        if str, interpret as path from which to load a dict
        if dict, can be:
            template settings dict; must supply `param` to choose which to plot
            params dict; must supply `param` to choose which to plot
            prior dict

    param : Param
        Name of param for which to get bounds;
        necessary if obj is either template settings or params

    stddev : float or Iterable of floats
        number of stddevs


    Returns
    -------
    bounds : OrderedDict
        A dictionary mapping the passed `stddev` values to the corresponding
        bounds

    """
    if isbarenumeric(stddev):
        stddev = [stddev]
    elif isinstance(stddev, Iterable):
        stddev = list(stddev)

    bounds = OrderedDict()
    for s in stddev:
        bounds[s] = []

    if isinstance(obj, basestring):
        obj = from_file(obj)

    if 'params' in obj:
        obj = obj['params']
    if param is not None and param in obj:
        obj = obj[param]
    if 'prior' in obj:
        obj = obj['prior']

    prior = Prior(**obj)

    logging.debug('Getting confidence region from prior: %s', prior)
    x0 = prior.valid_range[0]
    x1 = prior.valid_range[1]
    x = ureg.Quantity(np.linspace(x0, x1, 10000), prior.units)
    chi2 = prior.chi2(x)
    for (i, xval) in enumerate(x[:-1]):
        for s in stddev:
            chi2_level = s**2
            if chi2[i] > chi2_level and chi2[i + 1] < chi2_level:
                bounds[s].append(xval)
            elif chi2[i] < chi2_level and chi2[i + 1] > chi2_level:
                bounds[s].append(x[i + 1])
    return bounds