Пример #1
0
def test_multiple_aggregators_with_dict_api():

    s = Series(range(6), dtype="int64", name="series")
    # nested renaming
    msg = "nested renamer is not supported"
    with pytest.raises(SpecificationError, match=msg):
        s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]})
Пример #2
0
    def test_no_args_raises(self):
        gr = Series([1, 2]).groupby([0, 1])
        with pytest.raises(TypeError, match="Must provide"):
            gr.agg()

        # but we do allow this
        result = gr.agg([])
        expected = DataFrame()
        tm.assert_frame_equal(result, expected)
Пример #3
0
def test_demo():
    # demonstration tests
    s = Series(range(6), dtype="int64", name="series")

    result = s.agg(["min", "max"])
    expected = Series([0, 5], index=["min", "max"], name="series")
    tm.assert_series_equal(result, expected)

    result = s.agg({"foo": "min"})
    expected = Series([0], index=["foo"], name="series")
    tm.assert_series_equal(result, expected)
Пример #4
0
    def test_non_callable_aggregates(self):
        # test agg using non-callable series attributes
        s = Series([1, 2, None])

        # Calling agg w/ just a string arg same as calling s.arg
        result = s.agg("size")
        expected = s.size
        assert result == expected

        # test when mixed w/ callable reducers
        result = s.agg(["size", "count", "mean"])
        expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
        tm.assert_series_equal(result[expected.index], expected)
Пример #5
0
    def test_non_callable_aggregates(self):
        # test agg using non-callable series attributes
        s = Series([1, 2, None])

        # Calling agg w/ just a string arg same as calling s.arg
        result = s.agg('size')
        expected = s.size
        assert result == expected

        # test when mixed w/ callable reducers
        result = s.agg(['size', 'count', 'mean'])
        expected = Series(
            OrderedDict([('size', 3.0), ('count', 2.0), ('mean', 1.5)]))
        assert_series_equal(result[expected.index], expected)
Пример #6
0
    def test_non_callable_aggregates(self):
        # test agg using non-callable series attributes
        s = Series([1, 2, None])

        # Calling agg w/ just a string arg same as calling s.arg
        result = s.agg('size')
        expected = s.size
        assert result == expected

        # test when mixed w/ callable reducers
        result = s.agg(['size', 'count', 'mean'])
        expected = Series(OrderedDict({'size': 3.0,
                                       'count': 2.0,
                                       'mean': 1.5}))
        assert_series_equal(result[expected.index], expected)
Пример #7
0
def numerical_summary(series: pd.Series) -> dict:
    """

    Args:
        series: series to summarize

    Returns:

    """
    aggregates = [
        "mean",
        "std",
        "var",
        "max",
        "min",
        "median",
        "kurt",
        "skew",
        "sum",
        "mad",
    ]
    summary = series.agg(aggregates).to_dict()

    quantiles = [0.05, 0.25, 0.5, 0.75, 0.95]
    for percentile, value in series.quantile(quantiles).to_dict().items():
        summary["quantile_{:d}".format(int(percentile * 100))] = value
    summary["iqr"] = summary["quantile_75"] - summary["quantile_25"]

    summary["range"] = summary["max"] - summary["min"]
    summary["cv"] = summary["std"] / summary["mean"] if summary["mean"] else np.NaN

    # TODO: only calculations for histogram, not the plotting
    # summary['image'] = plotting.histogram(series)
    return summary
Пример #8
0
    def test_demo(self):
        # demonstration tests
        s = Series(range(6), dtype="int64", name="series")

        result = s.agg(["min", "max"])
        expected = Series([0, 5], index=["min", "max"], name="series")
        tm.assert_series_equal(result, expected)

        result = s.agg({"foo": "min"})
        expected = Series([0], index=["foo"], name="series")
        tm.assert_series_equal(result, expected)

        # nested renaming
        msg = "nested renamer is not supported"
        with pytest.raises(SpecificationError, match=msg):
            s.agg({"foo": ["min", "max"]})
Пример #9
0
def test_agg_args(args, kwargs, increment):
    # GH 43357
    def f(x, a=0, b=0, c=0):
        return x + a + 10 * b + 100 * c

    s = Series([1, 2])
    result = s.agg(f, 0, *args, **kwargs)
    expected = s + increment
    tm.assert_series_equal(result, expected)
Пример #10
0
    def test_demo(self):
        # demonstration tests
        s = Series(range(6), dtype='int64', name='series')

        result = s.agg(['min', 'max'])
        expected = Series([0, 5], index=['min', 'max'], name='series')
        tm.assert_series_equal(result, expected)

        result = s.agg({'foo': 'min'})
        expected = Series([0], index=['foo'], name='series')
        tm.assert_series_equal(result, expected)

        # nested renaming
        with tm.assert_produces_warning(FutureWarning):
            result = s.agg({'foo': ['min', 'max']})

        expected = DataFrame({
            'foo': [0, 5]
        }, index=['min', 'max']).unstack().rename('series')
        tm.assert_series_equal(result, expected)
Пример #11
0
    def test_demo(self):
        # demonstration tests
        s = Series(range(6), dtype='int64', name='series')

        result = s.agg(['min', 'max'])
        expected = Series([0, 5], index=['min', 'max'], name='series')
        tm.assert_series_equal(result, expected)

        result = s.agg({'foo': 'min'})
        expected = Series([0], index=['foo'], name='series')
        tm.assert_series_equal(result, expected)

        # nested renaming
        with tm.assert_produces_warning(FutureWarning):
            result = s.agg({'foo': ['min', 'max']})

        expected = DataFrame(
            {'foo': [0, 5]},
            index=['min', 'max']).unstack().rename('series')
        tm.assert_series_equal(result, expected)
Пример #12
0
    def test_demo(self):
        # demonstration tests
        s = Series(range(6), dtype="int64", name="series")

        result = s.agg(["min", "max"])
        expected = Series([0, 5], index=["min", "max"], name="series")
        tm.assert_series_equal(result, expected)

        result = s.agg({"foo": "min"})
        expected = Series([0], index=["foo"], name="series")
        tm.assert_series_equal(result, expected)

        # nested renaming
        with tm.assert_produces_warning(FutureWarning):
            result = s.agg({"foo": ["min", "max"]})

        expected = (DataFrame({
            "foo": [0, 5]
        }, index=["min", "max"]).unstack().rename("series"))
        tm.assert_series_equal(result, expected)
Пример #13
0
def get_extremes(data: pd.Series) -> pd.Series:
    """Get the minimum and maximum values of the `data`.

    Args:
        data (pandas.Series): Data array.

    Returns:
        pandas.Series: An array of maximum and minimum values.
    """
    extremes = data.agg([min, max])
    return data[data.isin(extremes)]
Пример #14
0
def numerical_basic_summary(series: pd.Series) -> dict:
    """Summary with basic aggregates

    Args:
        series: series to summarize

    Returns:
        A summary of aggregates of `mean`, `std`, `var`, `min`, `max` and `sum`.

    """
    aggregates = ["mean", "std", "var", "min", "max", "sum"]
    summary = series.agg(aggregates).to_dict()
    return summary
Пример #15
0
    def test_multiple_aggregators_with_dict_api(self):
        s = Series(range(6), dtype='int64', name='series')
        # nested renaming
        with tm.assert_produces_warning(FutureWarning):
            result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']})

        expected = DataFrame(
            {'foo': [5.0, np.nan, 0.0, np.nan],
             'bar': [np.nan, 2.5, np.nan, 15.0]},
            columns=['foo', 'bar'],
            index=['max', 'mean',
                   'min', 'sum']).unstack().rename('series')
        tm.assert_series_equal(result.reindex_like(expected), expected)
Пример #16
0
def range_summary(series: pd.Series) -> dict:
    """Summarize min, max and calculate the range

    Args:
        series: series to summarize

    Returns:
        A dict with `min`, `max` and `range`.
    """
    aggregates = ["min", "max"]
    summary = series.agg(aggregates).to_dict()

    summary["range"] = summary["max"] - summary["min"]
    return summary
Пример #17
0
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
    # GH#46560
    kernel = arithmetic_win_operators
    ser = Series([1], dtype=dtype)
    expanding = ser.expanding()
    op = getattr(expanding, kernel)
    if numeric_only and dtype is object:
        msg = f"Expanding.{kernel} does not implement numeric_only"
        with pytest.raises(NotImplementedError, match=msg):
            op(numeric_only=numeric_only)
    else:
        result = op(numeric_only=numeric_only)
        expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
        tm.assert_series_equal(result, expected)
Пример #18
0
    def test_multiple_aggregators_with_dict_api(self):

        s = Series(range(6), dtype='int64', name='series')
        # nested renaming
        with tm.assert_produces_warning(FutureWarning):
            result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']})

        expected = DataFrame(
            {'foo': [5.0, np.nan, 0.0, np.nan],
             'bar': [np.nan, 2.5, np.nan, 15.0]},
            columns=['foo', 'bar'],
            index=['max', 'mean',
                   'min', 'sum']).unstack().rename('series')
        tm.assert_series_equal(result.reindex_like(expected), expected)
Пример #19
0
    def test_multiple_aggregators_with_dict_api(self):

        s = Series(range(6), dtype="int64", name="series")
        # nested renaming
        with tm.assert_produces_warning(FutureWarning):
            result = s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]})

        expected = (DataFrame(
            {
                "foo": [5.0, np.nan, 0.0, np.nan],
                "bar": [np.nan, 2.5, np.nan, 15.0]
            },
            columns=["foo", "bar"],
            index=["max", "mean", "min", "sum"],
        ).unstack().rename("series"))
        tm.assert_series_equal(result.reindex_like(expected), expected)
Пример #20
0
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
    # GH#46560
    kernel = arithmetic_win_operators
    ser = Series([1], dtype=dtype)
    ewm = ser.ewm(span=2, min_periods=1)
    op = getattr(ewm, kernel, None)
    if op is None:
        # Nothing to test
        return
    if numeric_only and dtype is object:
        msg = f"ExponentialMovingWindow.{kernel} does not implement numeric_only"
        with pytest.raises(NotImplementedError, match=msg):
            op(numeric_only=numeric_only)
    else:
        result = op(numeric_only=numeric_only)
        expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
        tm.assert_series_equal(result, expected)
Пример #21
0
def bar_by_group(x: pd.Series, g: pd.Series, aggfunc="mean", *args, **kwargs):
    """
    >>> mpg = data('mpg')
    >>> bar_by_group(mpg.hwy, mpg['class'])
    (<Figure size ... with 1 Axes>, <matplotlib.axes._subplots.AxesSubplot object at ...>)
    """
    g = g.top_n(3)
    fig, ax = plt.subplots()
    x.groupby(g).agg(aggfunc).plot.bar(ax=ax, color="pink", width=1)
    ax.set(title=f"{aggfunc} of {x.name} by {g.name}")
    if aggfunc == "mean":
        xbar = x.agg(aggfunc)
        z = 2.58  # 99% ci
        ci = z * (x.std() / math.sqrt(x.shape[0]))
        ub, lb = xbar + ci, xbar - ci
        ax.hlines(xbar, -0.5, 3.5, ls="--", color="gray")
        ax.hlines([lb, ub], -0.5, 3.5, ls=":", color="gray")
    return fig, ax
Пример #22
0
def test_series_agg_nested_renamer():
    s = Series(range(6), dtype="int64", name="series")
    msg = "nested renamer is not supported"
    with pytest.raises(SpecificationError, match=msg):
        s.agg({"foo": ["min", "max"]})
Пример #23
0
 def test_mangle_series_groupby(self):
     gr = Series([1, 2, 3, 4]).groupby([0, 0, 1, 1])
     result = gr.agg([lambda x: 0, lambda x: 1])
     expected = DataFrame({"<lambda_0>": [0, 0], "<lambda_1>": [1, 1]})
     tm.assert_frame_equal(result, expected)
Пример #24
0
 def test_mangled(self):
     gr = Series([1, 2, 3]).groupby([0, 0, 1])
     result = gr.agg(a=lambda x: 0, b=lambda x: 1)
     expected = DataFrame({"a": [0, 0], "b": [1, 1]})
     tm.assert_frame_equal(result, expected)
Пример #25
0
 def test_series_named_agg_duplicates_no_raises(self):
     # GH28426
     gr = Series([1, 2, 3]).groupby([0, 0, 1])
     grouped = gr.agg(a="sum", b="sum")
     expected = DataFrame({"a": [3, 3], "b": [3, 3]})
     tm.assert_frame_equal(expected, grouped)
Пример #26
0
def test_series_nested_renamer(renamer):
    s = Series(range(6), dtype="int64", name="series")
    msg = "nested renamer is not supported"
    with pytest.raises(SpecificationError, match=msg):
        s.agg(renamer)
Пример #27
0
print(ten)
print(ten.values)
print("Indexes:")
print(list(ten.index))

# Extract numbers by condition
pos_nums = nums[nums > 0]
print(f"There are {len(pos_nums)} positive numbers")

# You can plot a Series with matplotlib
_, axis = plt.subplots()
axis.plot(pos_nums)
plt.show()

# Find aggregate values (functions that work on sequences)
print(nums.agg(['min', 'max',
                'sum']))  # Quoted values are accessible function names
print(ten.min(), ten.max())
print('3 largest:\n', ten.nlargest(3), sep='')
print('3 smallest:\n', ten.nsmallest(3), sep='')

# Map
print("Adding 50 to ten numbers:")
print(ten.map(lambda x: x + 50))
print(ten.map('I am {}'.format))
print(ten.transform([np.sqrt, np.exp]))  # transform allows multiple "maps"

# Use apply for functions that take more than 1 parm (subsequent args in `args`)
print("Cubing ten numbers:")
cubes = ten.apply(operator.pow, args=(3, ))
print(cubes)
cubes2 = ten.pow(3)  # Series has a pow function as well