示例#1
0
def test_value_counts_null(null_obj, index_or_series_obj):
    orig = index_or_series_obj
    obj = orig.copy()

    if not allow_na_ops(obj):
        pytest.skip("type doesn't allow for NA operations")
    elif len(obj) < 1:
        pytest.skip("Test doesn't make sense on empty data")
    elif isinstance(orig, pd.MultiIndex):
        pytest.skip(f"MultiIndex can't hold '{null_obj}'")

    values = obj.values
    if needs_i8_conversion(obj.dtype):
        values[0:2] = iNaT
    else:
        values[0:2] = null_obj

    klass = type(obj)
    repeated_values = np.repeat(values, range(1, len(values) + 1))
    obj = klass(repeated_values, dtype=obj.dtype)

    # because np.nan == np.nan is False, but None == None is True
    # np.nan would be duplicated, whereas None wouldn't
    counter = collections.Counter(obj.dropna())
    expected = Series(dict(counter.most_common()), dtype=np.int64)
    expected.index = expected.index.astype(obj.dtype)

    result = obj.value_counts()
    if obj.duplicated().any():
        # TODO:
        #  Order of entries with the same count is inconsistent on CI (gh-32449)
        expected = expected.sort_index()
        result = result.sort_index()
    tm.assert_series_equal(result, expected)

    # can't use expected[null_obj] = 3 as
    # IntervalIndex doesn't allow assignment
    new_entry = Series({np.nan: 3}, dtype=np.int64)
    expected = expected.append(new_entry)

    result = obj.value_counts(dropna=False)
    if obj.duplicated().any():
        # TODO:
        #  Order of entries with the same count is inconsistent on CI (gh-32449)
        expected = expected.sort_index()
        result = result.sort_index()
    tm.assert_series_equal(result, expected)
示例#2
0
文件: test_unique.py 项目: rth/pandas
def test_unique_null(null_obj, index_or_series_obj):
    obj = index_or_series_obj

    if not allow_na_ops(obj):
        pytest.skip("type doesn't allow for NA operations")
    elif len(obj) < 1:
        pytest.skip("Test doesn't make sense on empty data")
    elif isinstance(obj, pd.MultiIndex):
        pytest.skip(f"MultiIndex can't hold '{null_obj}'")

    values = obj.values
    if needs_i8_conversion(obj.dtype):
        values[0:2] = iNaT
    else:
        values[0:2] = null_obj

    klass = type(obj)
    repeated_values = np.repeat(values, range(1, len(values) + 1))
    obj = klass(repeated_values, dtype=obj.dtype)
    result = obj.unique()

    unique_values_raw = dict.fromkeys(obj.values)
    # because np.nan == np.nan is False, but None == None is True
    # np.nan would be duplicated, whereas None wouldn't
    unique_values_not_null = [
        val for val in unique_values_raw if not pd.isnull(val)
    ]
    unique_values = [null_obj] + unique_values_not_null

    if isinstance(obj,
                  pd.Index) and obj._is_backward_compat_public_numeric_index:
        expected = NumericIndex(unique_values, dtype=obj.dtype)
        tm.assert_index_equal(result, expected, exact=True)
    elif isinstance(obj, pd.Index):
        expected = pd.Index(unique_values, dtype=obj.dtype)
        if is_datetime64tz_dtype(obj.dtype):
            result = result.normalize()
            expected = expected.normalize()
        tm.assert_index_equal(result, expected, exact=True)
    else:
        expected = np.array(unique_values, dtype=obj.dtype)
        tm.assert_numpy_array_equal(result, expected)
示例#3
0
def test_nunique_null(null_obj, index_or_series_obj):
    obj = index_or_series_obj

    if not allow_na_ops(obj):
        pytest.skip("type doesn't allow for NA operations")
    elif isinstance(obj, pd.MultiIndex):
        pytest.skip(f"MultiIndex can't hold '{null_obj}'")

    values = obj._values
    values[0:2] = null_obj

    klass = type(obj)
    repeated_values = np.repeat(values, range(1, len(values) + 1))
    obj = klass(repeated_values, dtype=obj.dtype)

    if isinstance(obj, pd.CategoricalIndex):
        assert obj.nunique() == len(obj.categories)
        assert obj.nunique(dropna=False) == len(obj.categories) + 1
    else:
        num_unique_values = len(obj.unique())
        assert obj.nunique() == max(0, num_unique_values - 1)
        assert obj.nunique(dropna=False) == max(0, num_unique_values)