Пример #1
0
def _std_moment(data, m, s, r):
    """Return the length and standardised moment of order r = 1...4."""
    assert r in (1, 2, 3, 4), "private function not intended for r != 1...4"
    if m is None or s is None:
        # We need multiple passes over the data, so make sure we can.
        if not isinstance(data, list):
            data = list(data)
        if m is None: m = mean(data)
        if s is None: s = pstdev(data, m)
    # Minimize the number of arithmetic operations needed for some
    # common functions.
    if False and s == 1:  # FIXME this optimization is currently disabled.
        if r == 1:
            args = (m,)
            f = lambda x, m: (x-m)
        elif r == 2:
            args = (m,)
            f = lambda x, m: (x-m)**2
        else:
            args = (m, r)
            f = lambda x, m, r: (x-m)**r
    else:
        args = (m, s, r)
        f = lambda x, m, s, r: ((x-m)/s)**r
    n, total = _len_sum(v.apply(f, x, *args) for x in data)
    return (n, total)
Пример #2
0
def average_deviation(data, m=None):
    """average_deviation(data [, m]) -> average absolute deviation of data.

    Returns the average deviation of the sample data from the population
    centre ``m`` (usually the mean, or the median). If you know the
    population mean or median, pass it as the second element:

    >>> data = [2.0, 2.25, 2.5, 2.5, 3.25]  # A sample from a population
    >>> mu = 2.75                           # with a known mean.
    >>> average_deviation(data, mu)
    0.45

    If you don't know the centre location, you can estimate it by passing
    the sample mean or median instead. If ``m`` is not None, or not given,
    the sample mean is calculated from the data and used:

    >>> average_deviation(data)
    0.3

    If data is an iterable of sequences, each inner sequence represents a
    row of data, and ``average_deviation`` operates on each column. Every
    row must have the same number of columns, or ValueError is raised.
    Similarly, m (if given) must have either the same number of items, or
    be a single number.

    >>> data = [[0, 1, 2, 4],
    ...         [1, 2, 4, 6],
    ...         [2, 4, 6, 6]]
    ...
    >>> average_deviation(data, [1, 2, 3.5, 6])  #doctest: +ELLIPSIS
    [0.666666..., 1.0, 1.5, 0.666666...]

    """
    if m is None:
        if not isinstance(data, list):
            data = list(data)
        m = stats.mean(data)
    f = lambda x, m: abs(x-m)
    count, total = stats._len_sum(v.apply(f, x, m) for x in data)
    if not count:
        raise stats.StatsError(
        'average deviation requires at least 1 data point')
    return v.div(total, count)
Пример #3
0
def harmonic_mean(data):
    """harmonic_mean(iterable_of_numbers) -> harmonic mean of numbers
    harmonic_mean(iterable_of_rows) -> harmonic means of columns

    Return the harmonic mean of the given numbers or columns.

    The harmonic mean, or subcontrary mean, is the reciprocal of the
    arithmetic mean of the reciprocals of the data. It is a type of average
    best used for averaging rates or speeds.

    >>> harmonic_mean([0.25, 0.5, 1.0, 1.0])
    0.5

    If data includes one or more zero values, the result will be zero if the
    zeroes are all the same sign, or an NAN if they are of opposite signs.

    When passed an iterable of sequences, each inner sequence represents a
    row of data, and ``harmonic_mean`` operates on each column. All rows
    must have the same number of columns, or ValueError is raised.

    >>> data = [[0, 1, 2, 4],
    ...         [1, 2, 4, 8],
    ...         [2, 4, 8, 8]]
    ...
    >>> harmonic_mean(data)  #doctest: +ELLIPSIS
    [0.0, 1.71428..., 3.42857..., 6.0]

    """
    # FIXME harmonic_mean([x]) should equal x exactly, but due to rounding
    # errors in the 1/(1/x) round trip, sometimes it doesn't.
    invert = functools.partial(_divide, 1)
    n, total = stats._len_sum(v.apply(invert, x) for x in data)
    if not n:
        raise stats.StatsError(
        'harmonic mean of empty sequence is not defined')
    return v.div(n, total)