Exemplo n.º 1
0
class TestSeriesMap(TestData):
    def test_map(self):
        index, data = tm.getMixedTypeDict()

        source = Series(data['B'], index=data['C'])
        target = Series(data['C'][:4], index=data['D'][:4])

        merged = target.map(source)

        for k, v in compat.iteritems(merged):
            assert v == source[target[k]]

        # input could be a dict
        merged = target.map(source.to_dict())

        for k, v in compat.iteritems(merged):
            assert v == source[target[k]]

        # function
        result = self.ts.map(lambda x: x * 2)
        tm.assert_series_equal(result, self.ts * 2)

        # GH 10324
        a = Series([1, 2, 3, 4])
        b = Series(["even", "odd", "even", "odd"], dtype="category")
        c = Series(["even", "odd", "even", "odd"])

        exp = Series(["odd", "even", "odd", np.nan], dtype="category")
        tm.assert_series_equal(a.map(b), exp)
        exp = Series(["odd", "even", "odd", np.nan])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(['a', 'b', 'c', 'd'])
        b = Series([1, 2, 3, 4],
                   index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
        c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e']))

        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(['a', 'b', 'c', 'd'])
        b = Series(['B', 'C', 'D', 'E'],
                   dtype='category',
                   index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
        c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e']))

        exp = Series(
            pd.Categorical([np.nan, 'B', 'C', 'D'],
                           categories=['B', 'C', 'D', 'E']))
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, 'B', 'C', 'D'])
        tm.assert_series_equal(a.map(c), exp)

    @pytest.mark.parametrize("index", tm.all_index_generator(10))
    def test_map_empty(self, index):
        s = Series(index)
        result = s.map({})

        expected = pd.Series(np.nan, index=s.index)
        tm.assert_series_equal(result, expected)

    def test_map_compat(self):
        # related GH 8024
        s = Series([True, True, False], index=[1, 2, 3])
        result = s.map({True: 'foo', False: 'bar'})
        expected = Series(['foo', 'foo', 'bar'], index=[1, 2, 3])
        assert_series_equal(result, expected)

    def test_map_int(self):
        left = Series({'a': 1., 'b': 2., 'c': 3., 'd': 4})
        right = Series({1: 11, 2: 22, 3: 33})

        assert left.dtype == np.float_
        assert issubclass(right.dtype.type, np.integer)

        merged = left.map(right)
        assert merged.dtype == np.float_
        assert isna(merged['d'])
        assert not isna(merged['c'])

    def test_map_type_inference(self):
        s = Series(lrange(3))
        s2 = s.map(lambda x: np.where(x == 0, 0, 1))
        assert issubclass(s2.dtype.type, np.integer)

    def test_map_decimal(self):
        from decimal import Decimal

        result = self.series.map(lambda x: Decimal(str(x)))
        assert result.dtype == np.object_
        assert isinstance(result[0], Decimal)

    def test_map_na_exclusion(self):
        s = Series([1.5, np.nan, 3, np.nan, 5])

        result = s.map(lambda x: x * 2, na_action='ignore')
        exp = s * 2
        assert_series_equal(result, exp)

    def test_map_dict_with_tuple_keys(self):
        """
        Due to new MultiIndex-ing behaviour in v0.14.0,
        dicts with tuple keys passed to map were being
        converted to a multi-index, preventing tuple values
        from being mapped properly.
        """
        # GH 18496
        df = pd.DataFrame({'a': [(1, ), (2, ), (3, 4), (5, 6)]})
        label_mappings = {(1, ): 'A', (2, ): 'B', (3, 4): 'A', (5, 6): 'B'}

        df['labels'] = df['a'].map(label_mappings)
        df['expected_labels'] = pd.Series(['A', 'B', 'A', 'B'], index=df.index)
        # All labels should be filled now
        tm.assert_series_equal(df['labels'],
                               df['expected_labels'],
                               check_names=False)

    def test_map_counter(self):
        s = Series(['a', 'b', 'c'], index=[1, 2, 3])
        counter = Counter()
        counter['b'] = 5
        counter['c'] += 1
        result = s.map(counter)
        expected = Series([0, 5, 1], index=[1, 2, 3])
        assert_series_equal(result, expected)

    def test_map_defaultdict(self):
        s = Series([1, 2, 3], index=['a', 'b', 'c'])
        default_dict = defaultdict(lambda: 'blank')
        default_dict[1] = 'stuff'
        result = s.map(default_dict)
        expected = Series(['stuff', 'blank', 'blank'], index=['a', 'b', 'c'])
        assert_series_equal(result, expected)

    def test_map_dict_subclass_with_missing(self):
        """
        POJO.Test Series.map with a dictionary subclass that defines __missing__,
        i.e. sets a default value (GH #15999).
        """
        class DictWithMissing(dict):
            def __missing__(self, key):
                return 'missing'

        s = Series([1, 2, 3])
        dictionary = DictWithMissing({3: 'three'})
        result = s.map(dictionary)
        expected = Series(['missing', 'missing', 'three'])
        assert_series_equal(result, expected)

    def test_map_dict_subclass_without_missing(self):
        class DictWithoutMissing(dict):
            pass

        s = Series([1, 2, 3])
        dictionary = DictWithoutMissing({3: 'three'})
        result = s.map(dictionary)
        expected = Series([np.nan, np.nan, 'three'])
        assert_series_equal(result, expected)

    def test_map_box(self):
        vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
        s = pd.Series(vals)
        assert s.dtype == 'datetime64[ns]'
        # boxed value must be Timestamp instance
        res = s.map(
            lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__, x.day, x.tz))
        exp = pd.Series(['Timestamp_1_None', 'Timestamp_2_None'])
        tm.assert_series_equal(res, exp)

        vals = [
            pd.Timestamp('2011-01-01', tz='US/Eastern'),
            pd.Timestamp('2011-01-02', tz='US/Eastern')
        ]
        s = pd.Series(vals)
        assert s.dtype == 'datetime64[ns, US/Eastern]'
        res = s.map(
            lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__, x.day, x.tz))
        exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern'])
        tm.assert_series_equal(res, exp)

        # timedelta
        vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
        s = pd.Series(vals)
        assert s.dtype == 'timedelta64[ns]'
        res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days))
        exp = pd.Series(['Timedelta_1', 'Timedelta_2'])
        tm.assert_series_equal(res, exp)

        # period (object dtype, not boxed)
        vals = [
            pd.Period('2011-01-01', freq='M'),
            pd.Period('2011-01-02', freq='M')
        ]
        s = pd.Series(vals)
        assert s.dtype == 'object'
        res = s.map(
            lambda x: '{0}_{1}'.format(x.__class__.__name__, x.freqstr))
        exp = pd.Series(['Period_M', 'Period_M'])
        tm.assert_series_equal(res, exp)

    def test_map_categorical(self):
        values = pd.Categorical(list('ABBABCD'),
                                categories=list('DCBA'),
                                ordered=True)
        s = pd.Series(values, name='XX', index=list('abcdefg'))

        result = s.map(lambda x: x.lower())
        exp_values = pd.Categorical(list('abbabcd'),
                                    categories=list('dcba'),
                                    ordered=True)
        exp = pd.Series(exp_values, name='XX', index=list('abcdefg'))
        tm.assert_series_equal(result, exp)
        tm.assert_categorical_equal(result.values, exp_values)

        result = s.map(lambda x: 'A')
        exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg'))
        tm.assert_series_equal(result, exp)
        assert result.dtype == np.object

        with pytest.raises(NotImplementedError):
            s.map(lambda x: x, na_action='ignore')

    def test_map_datetimetz(self):
        values = pd.date_range('2011-01-01', '2011-01-02',
                               freq='H').tz_localize('Asia/Tokyo')
        s = pd.Series(values, name='XX')

        # keep tz
        result = s.map(lambda x: x + pd.offsets.Day())
        exp_values = pd.date_range('2011-01-02', '2011-01-03',
                                   freq='H').tz_localize('Asia/Tokyo')
        exp = pd.Series(exp_values, name='XX')
        tm.assert_series_equal(result, exp)

        # change dtype
        # GH 14506 : Returned dtype changed from int32 to int64
        result = s.map(lambda x: x.hour)
        exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
        tm.assert_series_equal(result, exp)

        with pytest.raises(NotImplementedError):
            s.map(lambda x: x, na_action='ignore')

        # not vectorized
        def f(x):
            if not isinstance(x, pd.Timestamp):
                raise ValueError
            return str(x.tz)

        result = s.map(f)
        exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
        tm.assert_series_equal(result, exp)

    @pytest.mark.parametrize("vals,mapping,exp",
                             [(list('abc'), {
                                 np.nan: 'not NaN'
                             }, [np.nan] * 3 + ['not NaN']),
                              (list('abc'), {
                                  'a': 'a letter'
                              }, ['a letter'] + [np.nan] * 3),
                              (list(range(3)), {
                                  0: 42
                              }, [42] + [np.nan] * 3)])
    def test_map_missing_mixed(self, vals, mapping, exp):
        # GH20495
        s = pd.Series(vals + [np.nan])
        result = s.map(mapping)

        tm.assert_series_equal(result, pd.Series(exp))
Exemplo n.º 2
0
class TestFancy(Base):
    """ pure get/set item & fancy indexing """

    def test_setitem_ndarray_1d(self):
        # GH5508

        # len of indexer vs length of the 1d ndarray
        df = DataFrame(index=Index(np.arange(1, 11)))
        df["foo"] = np.zeros(10, dtype=np.float64)
        df["bar"] = np.zeros(10, dtype=np.complex)

        # invalid
        with pytest.raises(ValueError):
            df.loc[df.index[2:5], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0])

        # valid
        df.loc[df.index[2:6], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0])

        result = df.loc[df.index[2:6], "bar"]
        expected = Series(
            [2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], name="bar"
        )
        tm.assert_series_equal(result, expected)

        # dtype getting changed?
        df = DataFrame(index=Index(np.arange(1, 11)))
        df["foo"] = np.zeros(10, dtype=np.float64)
        df["bar"] = np.zeros(10, dtype=np.complex)

        with pytest.raises(ValueError):
            df[2:5] = np.arange(1, 4) * 1j

    @pytest.mark.parametrize(
        "index", tm.all_index_generator(5), ids=lambda x: type(x).__name__
    )
    @pytest.mark.parametrize(
        "obj",
        [
            lambda i: Series(np.arange(len(i)), index=i),
            lambda i: DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i),
        ],
        ids=["Series", "DataFrame"],
    )
    @pytest.mark.parametrize(
        "idxr, idxr_id",
        [
            (lambda x: x, "getitem"),
            (lambda x: x.loc, "loc"),
            (lambda x: x.iloc, "iloc"),
            pytest.param(lambda x: x.ix, "ix", marks=ignore_ix),
        ],
    )
    def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
        # GH 25567
        obj = obj(index)
        idxr = idxr(obj)
        nd3 = np.random.randint(5, size=(2, 2, 2))

        msg = (
            r"Buffer has wrong number of dimensions \(expected 1,"
            r" got 3\)|"
            "The truth value of an array with more than one element is"
            " ambiguous|"
            "Cannot index with multidimensional key|"
            r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|"
            "No matching signature found|"  # TypeError
            "unhashable type: 'numpy.ndarray'"  # TypeError
        )

        if (
            isinstance(obj, Series)
            and idxr_id == "getitem"
            and index.inferred_type
            in [
                "string",
                "datetime64",
                "period",
                "timedelta64",
                "boolean",
                "categorical",
            ]
        ):
            idxr[nd3]
        else:
            if (
                isinstance(obj, DataFrame)
                and idxr_id == "getitem"
                and index.inferred_type == "boolean"
            ):
                error = TypeError
            elif idxr_id == "getitem" and index.inferred_type == "interval":
                error = TypeError
            else:
                error = ValueError

            with pytest.raises(error, match=msg):
                idxr[nd3]

    @pytest.mark.parametrize(
        "index", tm.all_index_generator(5), ids=lambda x: type(x).__name__
    )
    @pytest.mark.parametrize(
        "obj",
        [
            lambda i: Series(np.arange(len(i)), index=i),
            lambda i: DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i),
        ],
        ids=["Series", "DataFrame"],
    )
    @pytest.mark.parametrize(
        "idxr, idxr_id",
        [
            (lambda x: x, "setitem"),
            (lambda x: x.loc, "loc"),
            (lambda x: x.iloc, "iloc"),
            pytest.param(lambda x: x.ix, "ix", marks=ignore_ix),
        ],
    )
    def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
        # GH 25567
        obj = obj(index)
        idxr = idxr(obj)
        nd3 = np.random.randint(5, size=(2, 2, 2))

        msg = (
            r"Buffer has wrong number of dimensions \(expected 1,"
            r" got 3\)|"
            "The truth value of an array with more than one element is"
            " ambiguous|"
            "Only 1-dimensional input arrays are supported|"
            "'pandas._libs.interval.IntervalTree' object has no attribute"
            " 'set_value'|"  # AttributeError
            "unhashable type: 'numpy.ndarray'|"  # TypeError
            "No matching signature found|"  # TypeError
            r"^\[\[\["  # pandas.core.indexing.IndexingError
        )

        if (
            (idxr_id == "iloc")
            or (
                (
                    isinstance(obj, Series)
                    and idxr_id == "setitem"
                    and index.inferred_type
                    in [
                        "floating",
                        "string",
                        "datetime64",
                        "period",
                        "timedelta64",
                        "boolean",
                        "categorical",
                    ]
                )
            )
            or (
                idxr_id == "ix"
                and index.inferred_type in ["string", "datetime64", "period", "boolean"]
            )
        ):
            idxr[nd3] = 0
        else:
            with pytest.raises(
                (ValueError, AttributeError, TypeError, pd.core.indexing.IndexingError),
                match=msg,
            ):
                idxr[nd3] = 0

    def test_inf_upcast(self):
        # GH 16957
        # We should be able to use np.inf as a key
        # np.inf should cause an index to convert to float

        # Test with np.inf in rows
        df = DataFrame(columns=[0])
        df.loc[1] = 1
        df.loc[2] = 2
        df.loc[np.inf] = 3

        # make sure we can look up the value
        assert df.loc[np.inf, 0] == 3

        result = df.index
        expected = pd.Float64Index([1, 2, np.inf])
        tm.assert_index_equal(result, expected)

        # Test with np.inf in columns
        df = DataFrame()
        df.loc[0, 0] = 1
        df.loc[1, 1] = 2
        df.loc[0, np.inf] = 3

        result = df.columns
        expected = pd.Float64Index([0, 1, np.inf])
        tm.assert_index_equal(result, expected)

    def test_setitem_dtype_upcast(self):

        # GH3216
        df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
        df["c"] = np.nan
        assert df["c"].dtype == np.float64

        df.loc[0, "c"] = "foo"
        expected = DataFrame([{"a": 1, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}])
        tm.assert_frame_equal(df, expected)

        # GH10280
        df = DataFrame(
            np.arange(6, dtype="int64").reshape(2, 3),
            index=list("ab"),
            columns=["foo", "bar", "baz"],
        )

        for val in [3.14, "wxyz"]:
            left = df.copy()
            left.loc["a", "bar"] = val
            right = DataFrame(
                [[0, val, 2], [3, 4, 5]],
                index=list("ab"),
                columns=["foo", "bar", "baz"],
            )

            tm.assert_frame_equal(left, right)
            assert is_integer_dtype(left["foo"])
            assert is_integer_dtype(left["baz"])

        left = DataFrame(
            np.arange(6, dtype="int64").reshape(2, 3) / 10.0,
            index=list("ab"),
            columns=["foo", "bar", "baz"],
        )
        left.loc["a", "bar"] = "wxyz"

        right = DataFrame(
            [[0, "wxyz", 0.2], [0.3, 0.4, 0.5]],
            index=list("ab"),
            columns=["foo", "bar", "baz"],
        )

        tm.assert_frame_equal(left, right)
        assert is_float_dtype(left["foo"])
        assert is_float_dtype(left["baz"])

    def test_dups_fancy_indexing(self):

        # GH 3455
        from pandas.util.testing import makeCustomDataframe as mkdf

        df = mkdf(10, 3)
        df.columns = ["a", "a", "b"]
        result = df[["b", "a"]].columns
        expected = Index(["b", "a", "a"])
        tm.assert_index_equal(result, expected)

        # across dtypes
        df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa"))
        df.head()
        str(df)
        result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]])
        result.columns = list("aaaaaaa")

        # TODO(wesm): unused?
        df_v = df.iloc[:, 4]  # noqa
        res_v = result.iloc[:, 4]  # noqa

        tm.assert_frame_equal(df, result)

        # GH 3561, dups not in selected order
        df = DataFrame(
            {"test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd")},
            index=["A", "A", "B", "C"],
        )
        rows = ["C", "B"]
        expected = DataFrame(
            {"test": [11, 9], "test1": [7.0, 6], "other": ["d", "c"]}, index=rows
        )
        result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        result = df.loc[Index(rows)]
        tm.assert_frame_equal(result, expected)

        rows = ["C", "B", "E"]
        expected = DataFrame(
            {
                "test": [11, 9, np.nan],
                "test1": [7.0, 6, np.nan],
                "other": ["d", "c", np.nan],
            },
            index=rows,
        )

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        # see GH5553, make sure we use the right indexer
        rows = ["F", "G", "H", "C", "B", "E"]
        expected = DataFrame(
            {
                "test": [np.nan, np.nan, np.nan, 11, 9, np.nan],
                "test1": [np.nan, np.nan, np.nan, 7.0, 6, np.nan],
                "other": [np.nan, np.nan, np.nan, "d", "c", np.nan],
            },
            index=rows,
        )
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        # List containing only missing label
        dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD"))
        with pytest.raises(
            KeyError,
            match=re.escape(
                "\"None of [Index(['E'], dtype='object')] are in the [index]\""
            ),
        ):
            dfnu.loc[["E"]]

        # ToDo: check_index_type can be True after GH 11497

        # GH 4619; duplicate indexer with missing label
        df = DataFrame({"A": [0, 1, 2]})
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[[0, 8, 0]]
        expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0])
        tm.assert_frame_equal(result, expected, check_index_type=False)

        df = DataFrame({"A": list("abc")})
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[[0, 8, 0]]
        expected = DataFrame({"A": ["a", np.nan, "a"]}, index=[0, 8, 0])
        tm.assert_frame_equal(result, expected, check_index_type=False)

        # non unique with non unique selector
        df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
        expected = DataFrame(
            {"test": [5, 7, 5, 7, np.nan]}, index=["A", "A", "A", "A", "E"]
        )
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[["A", "A", "E"]]
        tm.assert_frame_equal(result, expected)

    def test_dups_fancy_indexing2(self):
        # GH 5835
        # dups on index and missing values
        df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"])

        expected = pd.concat(
            [df.loc[:, ["A", "B"]], DataFrame(np.nan, columns=["C"], index=df.index)],
            axis=1,
        )
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[:, ["A", "B", "C"]]
        tm.assert_frame_equal(result, expected)

        # GH 6504, multi-axis indexing
        df = DataFrame(
            np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"]
        )

        expected = df.iloc[0:6]
        result = df.loc[[1, 2]]
        tm.assert_frame_equal(result, expected)

        expected = df
        result = df.loc[:, ["a", "b"]]
        tm.assert_frame_equal(result, expected)

        expected = df.iloc[0:6, :]
        result = df.loc[[1, 2], ["a", "b"]]
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("case", [lambda s: s, lambda s: s.loc])
    def test_duplicate_int_indexing(self, case):
        # GH 17347
        s = pd.Series(range(3), index=[1, 1, 3])
        expected = s[1]
        result = case(s)[[1]]
        tm.assert_series_equal(result, expected)

    def test_indexing_mixed_frame_bug(self):

        # GH3492
        df = DataFrame(
            {"a": {1: "aaa", 2: "bbb", 3: "ccc"}, "b": {1: 111, 2: 222, 3: 333}}
        )

        # this works, new column is created correctly
        df["test"] = df["a"].apply(lambda x: "_" if x == "aaa" else x)

        # this does not work, ie column test is not changed
        idx = df["test"] == "_"
        temp = df.loc[idx, "a"].apply(lambda x: "-----" if x == "aaa" else x)
        df.loc[idx, "test"] = temp
        assert df.iloc[0, 2] == "-----"

        # if I look at df, then element [0,2] equals '_'. If instead I type
        # df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I
        # get '_'.

    def test_multitype_list_index_access(self):
        # GH 10610
        df = DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23])

        with pytest.raises(KeyError, match=re.escape("'[-8, 26] not in index'")):
            df[[22, 26, -8]]
        assert df[21].shape[0] == df.shape[0]

    def test_set_index_nan(self):

        # GH 3586
        df = DataFrame(
            {
                "PRuid": {
                    17: "nonQC",
                    18: "nonQC",
                    19: "nonQC",
                    20: "10",
                    21: "11",
                    22: "12",
                    23: "13",
                    24: "24",
                    25: "35",
                    26: "46",
                    27: "47",
                    28: "48",
                    29: "59",
                    30: "10",
                },
                "QC": {
                    17: 0.0,
                    18: 0.0,
                    19: 0.0,
                    20: np.nan,
                    21: np.nan,
                    22: np.nan,
                    23: np.nan,
                    24: 1.0,
                    25: np.nan,
                    26: np.nan,
                    27: np.nan,
                    28: np.nan,
                    29: np.nan,
                    30: np.nan,
                },
                "data": {
                    17: 7.9544899999999998,
                    18: 8.0142609999999994,
                    19: 7.8591520000000008,
                    20: 0.86140349999999999,
                    21: 0.87853110000000001,
                    22: 0.8427041999999999,
                    23: 0.78587700000000005,
                    24: 0.73062459999999996,
                    25: 0.81668560000000001,
                    26: 0.81927080000000008,
                    27: 0.80705009999999999,
                    28: 0.81440240000000008,
                    29: 0.80140849999999997,
                    30: 0.81307740000000006,
                },
                "year": {
                    17: 2006,
                    18: 2007,
                    19: 2008,
                    20: 1985,
                    21: 1985,
                    22: 1985,
                    23: 1985,
                    24: 1985,
                    25: 1985,
                    26: 1985,
                    27: 1985,
                    28: 1985,
                    29: 1985,
                    30: 1986,
                },
            }
        ).reset_index()

        result = (
            df.set_index(["year", "PRuid", "QC"])
            .reset_index()
            .reindex(columns=df.columns)
        )
        tm.assert_frame_equal(result, df)

    def test_multi_assign(self):

        # GH 3626, an assignment of a sub-df to a df
        df = DataFrame(
            {
                "FC": ["a", "b", "a", "b", "a", "b"],
                "PF": [0, 0, 0, 0, 1, 1],
                "col1": list(range(6)),
                "col2": list(range(6, 12)),
            }
        )
        df.iloc[1, 0] = np.nan
        df2 = df.copy()

        mask = ~df2.FC.isna()
        cols = ["col1", "col2"]

        dft = df2 * 2
        dft.iloc[3, 3] = np.nan

        expected = DataFrame(
            {
                "FC": ["a", np.nan, "a", "b", "a", "b"],
                "PF": [0, 0, 0, 0, 1, 1],
                "col1": Series([0, 1, 4, 6, 8, 10]),
                "col2": [12, 7, 16, np.nan, 20, 22],
            }
        )

        # frame on rhs
        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        # with an ndarray on rhs
        # coerces to float64 because values has float64 dtype
        # GH 14001
        expected = DataFrame(
            {
                "FC": ["a", np.nan, "a", "b", "a", "b"],
                "PF": [0, 0, 0, 0, 1, 1],
                "col1": [0.0, 1.0, 4.0, 6.0, 8.0, 10.0],
                "col2": [12, 7, 16, np.nan, 20, 22],
            }
        )
        df2 = df.copy()
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)

        # broadcasting on the rhs is required
        df = DataFrame(
            dict(
                A=[1, 2, 0, 0, 0],
                B=[0, 0, 0, 10, 11],
                C=[0, 0, 0, 10, 11],
                D=[3, 4, 5, 6, 7],
            )
        )

        expected = df.copy()
        mask = expected["A"] == 0
        for col in ["A", "B"]:
            expected.loc[mask, col] = df["D"]

        df.loc[df["A"] == 0, ["A", "B"]] = df["D"]
        tm.assert_frame_equal(df, expected)

    def test_setitem_list(self):

        # GH 6043
        # ix with a list
        df = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            simplefilter("ignore")
            df.ix[1, 0] = [1, 2, 3]
            df.ix[1, 0] = [1, 2]

        result = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            simplefilter("ignore")
            result.ix[1, 0] = [1, 2]

        tm.assert_frame_equal(result, df)

        # ix with an object
        class TO:
            def __init__(self, value):
                self.value = value

            def __str__(self):
                return "[{0}]".format(self.value)

            __repr__ = __str__

            def __eq__(self, other):
                return self.value == other.value

            def view(self):
                return self

        df = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            simplefilter("ignore")
            df.ix[1, 0] = TO(1)
            df.ix[1, 0] = TO(2)

        result = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            simplefilter("ignore")
            result.ix[1, 0] = TO(2)

        tm.assert_frame_equal(result, df)

        # remains object dtype even after setting it back
        df = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            simplefilter("ignore")
            df.ix[1, 0] = TO(1)
            df.ix[1, 0] = np.nan
        result = DataFrame(index=[0, 1], columns=[0])

        tm.assert_frame_equal(result, df)

    def test_string_slice(self):
        # GH 14424
        # string indexing against datetimelike with object
        # dtype should properly raises KeyError
        df = DataFrame([1], Index([pd.Timestamp("2011-01-01")], dtype=object))
        assert df.index.is_all_dates
        with pytest.raises(KeyError, match="'2011'"):
            df["2011"]

        with pytest.raises(KeyError, match="'2011'"):
            df.loc["2011", 0]

        df = DataFrame()
        assert not df.index.is_all_dates
        with pytest.raises(KeyError, match="'2011'"):
            df["2011"]

        with pytest.raises(KeyError, match="'2011'"):
            df.loc["2011", 0]

    def test_astype_assignment(self):

        # GH4312 (iloc)
        df_orig = DataFrame(
            [["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
        )

        df = df_orig.copy()
        df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64)
        expected = DataFrame(
            [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
        )
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True)
        expected = DataFrame(
            [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
        )
        tm.assert_frame_equal(df, expected)

        # GH5702 (loc)
        df = df_orig.copy()
        df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
        expected = DataFrame(
            [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
        )
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
        expected = DataFrame(
            [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
        )
        tm.assert_frame_equal(df, expected)

        # full replacements / no nans
        df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
        df.iloc[:, 0] = df["A"].astype(np.int64)
        expected = DataFrame({"A": [1, 2, 3, 4]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
        df.loc[:, "A"] = df["A"].astype(np.int64)
        expected = DataFrame({"A": [1, 2, 3, 4]})
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize(
        "index,val",
        [
            (Index([0, 1, 2]), 2),
            (Index([0, 1, "2"]), "2"),
            (Index([0, 1, 2, np.inf, 4]), 4),
            (Index([0, 1, 2, np.nan, 4]), 4),
            (Index([0, 1, 2, np.inf]), np.inf),
            (Index([0, 1, 2, np.nan]), np.nan),
        ],
    )
    def test_index_contains(self, index, val):
        assert val in index

    @pytest.mark.parametrize(
        "index,val",
        [
            (Index([0, 1, 2]), "2"),
            (Index([0, 1, "2"]), 2),
            (Index([0, 1, 2, np.inf]), 4),
            (Index([0, 1, 2, np.nan]), 4),
            (Index([0, 1, 2, np.inf]), np.nan),
            (Index([0, 1, 2, np.nan]), np.inf),
            # Checking if np.inf in Int64Index should not cause an OverflowError
            # Related to GH 16957
            (pd.Int64Index([0, 1, 2]), np.inf),
            (pd.Int64Index([0, 1, 2]), np.nan),
            (pd.UInt64Index([0, 1, 2]), np.inf),
            (pd.UInt64Index([0, 1, 2]), np.nan),
        ],
    )
    def test_index_not_contains(self, index, val):
        assert val not in index

    @pytest.mark.parametrize(
        "index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")]
    )
    def test_mixed_index_contains(self, index, val):
        # GH 19860
        assert val in index

    @pytest.mark.parametrize(
        "index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)]
    )
    def test_mixed_index_not_contains(self, index, val):
        # GH 19860
        assert val not in index

    def test_contains_with_float_index(self):
        # GH#22085
        integer_index = pd.Int64Index([0, 1, 2, 3])
        uinteger_index = pd.UInt64Index([0, 1, 2, 3])
        float_index = pd.Float64Index([0.1, 1.1, 2.2, 3.3])

        for index in (integer_index, uinteger_index):
            assert 1.1 not in index
            assert 1.0 in index
            assert 1 in index

        assert 1.1 in float_index
        assert 1.0 not in float_index
        assert 1 not in float_index

    def test_index_type_coercion(self):

        with catch_warnings(record=True):
            simplefilter("ignore")

            # GH 11836
            # if we have an index type and set it with something that looks
            # to numpy like the same, but is actually, not
            # (e.g. setting with a float or string '0')
            # then we need to coerce to object

            # integer indexes
            for s in [Series(range(5)), Series(range(5), index=range(1, 6))]:

                assert s.index.is_integer()

                for indexer in [lambda x: x.ix, lambda x: x.loc, lambda x: x]:
                    s2 = s.copy()
                    indexer(s2)[0.1] = 0
                    assert s2.index.is_floating()
                    assert indexer(s2)[0.1] == 0

                    s2 = s.copy()
                    indexer(s2)[0.0] = 0
                    exp = s.index
                    if 0 not in s:
                        exp = Index(s.index.tolist() + [0])
                    tm.assert_index_equal(s2.index, exp)

                    s2 = s.copy()
                    indexer(s2)["0"] = 0
                    assert s2.index.is_object()

            for s in [Series(range(5), index=np.arange(5.0))]:

                assert s.index.is_floating()

                for idxr in [lambda x: x.ix, lambda x: x.loc, lambda x: x]:

                    s2 = s.copy()
                    idxr(s2)[0.1] = 0
                    assert s2.index.is_floating()
                    assert idxr(s2)[0.1] == 0

                    s2 = s.copy()
                    idxr(s2)[0.0] = 0
                    tm.assert_index_equal(s2.index, s.index)

                    s2 = s.copy()
                    idxr(s2)["0"] = 0
                    assert s2.index.is_object()
Exemplo n.º 3
0
import pandas as pd
import pandas.util.testing as tm
import numpy as np
np.random.seed(444)

tijd = tm.makeTimeDataFrame(freq="M").head()

adf2 = tm.makeDataFrame().head()

a = tm.all_index_generator()
for num in a:
    print(num)
Exemplo n.º 4
0
class TestSeriesMap:
    def test_map(self, datetime_series):
        index, data = tm.getMixedTypeDict()

        source = Series(data["B"], index=data["C"])
        target = Series(data["C"][:4], index=data["D"][:4])

        merged = target.map(source)

        for k, v in merged.items():
            assert v == source[target[k]]

        # input could be a dict
        merged = target.map(source.to_dict())

        for k, v in merged.items():
            assert v == source[target[k]]

        # function
        result = datetime_series.map(lambda x: x * 2)
        tm.assert_series_equal(result, datetime_series * 2)

        # GH 10324
        a = Series([1, 2, 3, 4])
        b = Series(["even", "odd", "even", "odd"], dtype="category")
        c = Series(["even", "odd", "even", "odd"])

        exp = Series(["odd", "even", "odd", np.nan], dtype="category")
        tm.assert_series_equal(a.map(b), exp)
        exp = Series(["odd", "even", "odd", np.nan])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(["a", "b", "c", "d"])
        b = Series([1, 2, 3, 4],
                   index=pd.CategoricalIndex(["b", "c", "d", "e"]))
        c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))

        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, 1, 2, 3])
        tm.assert_series_equal(a.map(c), exp)

        a = Series(["a", "b", "c", "d"])
        b = Series(
            ["B", "C", "D", "E"],
            dtype="category",
            index=pd.CategoricalIndex(["b", "c", "d", "e"]),
        )
        c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))

        exp = Series(
            pd.Categorical([np.nan, "B", "C", "D"],
                           categories=["B", "C", "D", "E"]))
        tm.assert_series_equal(a.map(b), exp)
        exp = Series([np.nan, "B", "C", "D"])
        tm.assert_series_equal(a.map(c), exp)

    @pytest.mark.parametrize("index", tm.all_index_generator(10))
    def test_map_empty(self, index):
        s = Series(index)
        result = s.map({})

        expected = pd.Series(np.nan, index=s.index)
        tm.assert_series_equal(result, expected)

    def test_map_compat(self):
        # related GH 8024
        s = Series([True, True, False], index=[1, 2, 3])
        result = s.map({True: "foo", False: "bar"})
        expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
        tm.assert_series_equal(result, expected)

    def test_map_int(self):
        left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
        right = Series({1: 11, 2: 22, 3: 33})

        assert left.dtype == np.float_
        assert issubclass(right.dtype.type, np.integer)

        merged = left.map(right)
        assert merged.dtype == np.float_
        assert isna(merged["d"])
        assert not isna(merged["c"])

    def test_map_type_inference(self):
        s = Series(range(3))
        s2 = s.map(lambda x: np.where(x == 0, 0, 1))
        assert issubclass(s2.dtype.type, np.integer)

    def test_map_decimal(self, string_series):
        from decimal import Decimal

        result = string_series.map(lambda x: Decimal(str(x)))
        assert result.dtype == np.object_
        assert isinstance(result[0], Decimal)

    def test_map_na_exclusion(self):
        s = Series([1.5, np.nan, 3, np.nan, 5])

        result = s.map(lambda x: x * 2, na_action="ignore")
        exp = s * 2
        tm.assert_series_equal(result, exp)

    def test_map_dict_with_tuple_keys(self):
        """
        Due to new MultiIndex-ing behaviour in v0.14.0,
        dicts with tuple keys passed to map were being
        converted to a multi-index, preventing tuple values
        from being mapped properly.
        """
        # GH 18496
        df = pd.DataFrame({"a": [(1, ), (2, ), (3, 4), (5, 6)]})
        label_mappings = {(1, ): "A", (2, ): "B", (3, 4): "A", (5, 6): "B"}

        df["labels"] = df["a"].map(label_mappings)
        df["expected_labels"] = pd.Series(["A", "B", "A", "B"], index=df.index)
        # All labels should be filled now
        tm.assert_series_equal(df["labels"],
                               df["expected_labels"],
                               check_names=False)

    def test_map_counter(self):
        s = Series(["a", "b", "c"], index=[1, 2, 3])
        counter = Counter()
        counter["b"] = 5
        counter["c"] += 1
        result = s.map(counter)
        expected = Series([0, 5, 1], index=[1, 2, 3])
        tm.assert_series_equal(result, expected)

    def test_map_defaultdict(self):
        s = Series([1, 2, 3], index=["a", "b", "c"])
        default_dict = defaultdict(lambda: "blank")
        default_dict[1] = "stuff"
        result = s.map(default_dict)
        expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
        tm.assert_series_equal(result, expected)

    def test_map_dict_na_key(self):
        # https://github.com/pandas-dev/pandas/issues/17648
        # Checks that np.nan key is appropriately mapped
        s = Series([1, 2, np.nan])
        expected = Series(["a", "b", "c"])
        result = s.map({1: "a", 2: "b", np.nan: "c"})
        tm.assert_series_equal(result, expected)

    def test_map_dict_subclass_with_missing(self):
        """
        Test Series.map with a dictionary subclass that defines __missing__,
        i.e. sets a default value (GH #15999).
        """
        class DictWithMissing(dict):
            def __missing__(self, key):
                return "missing"

        s = Series([1, 2, 3])
        dictionary = DictWithMissing({3: "three"})
        result = s.map(dictionary)
        expected = Series(["missing", "missing", "three"])
        tm.assert_series_equal(result, expected)

    def test_map_dict_subclass_without_missing(self):
        class DictWithoutMissing(dict):
            pass

        s = Series([1, 2, 3])
        dictionary = DictWithoutMissing({3: "three"})
        result = s.map(dictionary)
        expected = Series([np.nan, np.nan, "three"])
        tm.assert_series_equal(result, expected)

    def test_map_box(self):
        vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
        s = pd.Series(vals)
        assert s.dtype == "datetime64[ns]"
        # boxed value must be Timestamp instance
        res = s.map(
            lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz))
        exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"])
        tm.assert_series_equal(res, exp)

        vals = [
            pd.Timestamp("2011-01-01", tz="US/Eastern"),
            pd.Timestamp("2011-01-02", tz="US/Eastern"),
        ]
        s = pd.Series(vals)
        assert s.dtype == "datetime64[ns, US/Eastern]"
        res = s.map(
            lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz))
        exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
        tm.assert_series_equal(res, exp)

        # timedelta
        vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
        s = pd.Series(vals)
        assert s.dtype == "timedelta64[ns]"
        res = s.map(lambda x: "{0}_{1}".format(x.__class__.__name__, x.days))
        exp = pd.Series(["Timedelta_1", "Timedelta_2"])
        tm.assert_series_equal(res, exp)

        # period
        vals = [
            pd.Period("2011-01-01", freq="M"),
            pd.Period("2011-01-02", freq="M")
        ]
        s = pd.Series(vals)
        assert s.dtype == "Period[M]"
        res = s.map(
            lambda x: "{0}_{1}".format(x.__class__.__name__, x.freqstr))
        exp = pd.Series(["Period_M", "Period_M"])
        tm.assert_series_equal(res, exp)

    def test_map_categorical(self):
        values = pd.Categorical(list("ABBABCD"),
                                categories=list("DCBA"),
                                ordered=True)
        s = pd.Series(values, name="XX", index=list("abcdefg"))

        result = s.map(lambda x: x.lower())
        exp_values = pd.Categorical(list("abbabcd"),
                                    categories=list("dcba"),
                                    ordered=True)
        exp = pd.Series(exp_values, name="XX", index=list("abcdefg"))
        tm.assert_series_equal(result, exp)
        tm.assert_categorical_equal(result.values, exp_values)

        result = s.map(lambda x: "A")
        exp = pd.Series(["A"] * 7, name="XX", index=list("abcdefg"))
        tm.assert_series_equal(result, exp)
        assert result.dtype == np.object

        with pytest.raises(NotImplementedError):
            s.map(lambda x: x, na_action="ignore")

    def test_map_datetimetz(self):
        values = pd.date_range("2011-01-01", "2011-01-02",
                               freq="H").tz_localize("Asia/Tokyo")
        s = pd.Series(values, name="XX")

        # keep tz
        result = s.map(lambda x: x + pd.offsets.Day())
        exp_values = pd.date_range("2011-01-02", "2011-01-03",
                                   freq="H").tz_localize("Asia/Tokyo")
        exp = pd.Series(exp_values, name="XX")
        tm.assert_series_equal(result, exp)

        # change dtype
        # GH 14506 : Returned dtype changed from int32 to int64
        result = s.map(lambda x: x.hour)
        exp = pd.Series(list(range(24)) + [0], name="XX", dtype=np.int64)
        tm.assert_series_equal(result, exp)

        with pytest.raises(NotImplementedError):
            s.map(lambda x: x, na_action="ignore")

        # not vectorized
        def f(x):
            if not isinstance(x, pd.Timestamp):
                raise ValueError
            return str(x.tz)

        result = s.map(f)
        exp = pd.Series(["Asia/Tokyo"] * 25, name="XX")
        tm.assert_series_equal(result, exp)

    @pytest.mark.parametrize(
        "vals,mapping,exp",
        [
            (list("abc"), {
                np.nan: "not NaN"
            }, [np.nan] * 3 + ["not NaN"]),
            (list("abc"), {
                "a": "a letter"
            }, ["a letter"] + [np.nan] * 3),
            (list(range(3)), {
                0: 42
            }, [42] + [np.nan] * 3),
        ],
    )
    def test_map_missing_mixed(self, vals, mapping, exp):
        # GH20495
        s = pd.Series(vals + [np.nan])
        result = s.map(mapping)

        tm.assert_series_equal(result, pd.Series(exp))

    @pytest.mark.parametrize(
        "dti,exp",
        [
            (
                Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
                DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
            ),
            (
                tm.makeTimeSeries(nper=30),
                DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"),
            ),
        ],
    )
    def test_apply_series_on_date_time_index_aware_series(self, dti, exp):
        # GH 25959
        # Calling apply on a localized time series should not cause an error
        index = dti.tz_localize("UTC").index
        result = pd.Series(index).apply(lambda x: pd.Series([1, 2]))
        tm.assert_frame_equal(result, exp)

    def test_apply_scaler_on_date_time_index_aware_series(self):
        # GH 25959
        # Calling apply on a localized time series should not cause an error
        series = tm.makeTimeSeries(nper=30).tz_localize("UTC")
        result = pd.Series(series.index).apply(lambda x: 1)
        tm.assert_series_equal(result, pd.Series(np.ones(30), dtype="int64"))
Exemplo n.º 5
0
class TestFancy(Base):
    """ pure get/set item & fancy indexing """
    def test_setitem_ndarray_1d(self):
        # GH5508

        # len of indexer vs length of the 1d ndarray
        df = DataFrame(index=Index(np.arange(1, 11)))
        df['foo'] = np.zeros(10, dtype=np.float64)
        df['bar'] = np.zeros(10, dtype=np.complex)

        # invalid
        with pytest.raises(ValueError):
            df.loc[df.index[2:5],
                   'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0])

        # valid
        df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0])

        result = df.loc[df.index[2:6], 'bar']
        expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0],
                          index=[3, 4, 5, 6],
                          name='bar')
        tm.assert_series_equal(result, expected)

        # dtype getting changed?
        df = DataFrame(index=Index(np.arange(1, 11)))
        df['foo'] = np.zeros(10, dtype=np.float64)
        df['bar'] = np.zeros(10, dtype=np.complex)

        with pytest.raises(ValueError):
            df[2:5] = np.arange(1, 4) * 1j

    @pytest.mark.parametrize('index',
                             tm.all_index_generator(5),
                             ids=lambda x: type(x).__name__)
    @pytest.mark.parametrize('obj', [
        lambda i: Series(np.arange(len(i)), index=i), lambda i: DataFrame(
            np.random.randn(len(i), len(i)), index=i, columns=i)
    ],
                             ids=['Series', 'DataFrame'])
    @pytest.mark.parametrize(
        'idxr, idxr_id', [(lambda x: x, 'getitem'), (lambda x: x.loc, 'loc'),
                          (lambda x: x.iloc, 'iloc'),
                          pytest.param(lambda x: x.ix, 'ix', marks=ignore_ix)])
    def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
        # GH 25567
        obj = obj(index)
        idxr = idxr(obj)
        nd3 = np.random.randint(5, size=(2, 2, 2))

        msg = (
            r"Buffer has wrong number of dimensions \(expected 1,"
            r" got 3\)|"
            "The truth value of an array with more than one element is"
            " ambiguous|"
            "Cannot index with multidimensional key|"
            r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|"
            "unhashable type: 'numpy.ndarray'"  # TypeError
        )

        if (isinstance(obj, Series) and idxr_id == 'getitem'
                and index.inferred_type in [
                    'string', 'datetime64', 'period', 'timedelta64', 'boolean',
                    'categorical'
                ]):
            idxr[nd3]
        else:
            if (isinstance(obj, DataFrame) and idxr_id == 'getitem'
                    and index.inferred_type == 'boolean'):
                error = TypeError
            else:
                error = ValueError

            with pytest.raises(error, match=msg):
                idxr[nd3]

    @pytest.mark.parametrize('index',
                             tm.all_index_generator(5),
                             ids=lambda x: type(x).__name__)
    @pytest.mark.parametrize('obj', [
        lambda i: Series(np.arange(len(i)), index=i), lambda i: DataFrame(
            np.random.randn(len(i), len(i)), index=i, columns=i)
    ],
                             ids=['Series', 'DataFrame'])
    @pytest.mark.parametrize(
        'idxr, idxr_id', [(lambda x: x, 'setitem'), (lambda x: x.loc, 'loc'),
                          (lambda x: x.iloc, 'iloc'),
                          pytest.param(lambda x: x.ix, 'ix', marks=ignore_ix)])
    def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
        # GH 25567
        obj = obj(index)
        idxr = idxr(obj)
        nd3 = np.random.randint(5, size=(2, 2, 2))

        msg = (
            r"Buffer has wrong number of dimensions \(expected 1,"
            r" got 3\)|"
            "The truth value of an array with more than one element is"
            " ambiguous|"
            "Only 1-dimensional input arrays are supported|"
            "'pandas._libs.interval.IntervalTree' object has no attribute"
            " 'set_value'|"  # AttributeError
            "unhashable type: 'numpy.ndarray'|"  # TypeError
            r"^\[\[\["  # pandas.core.indexing.IndexingError
        )

        if ((idxr_id == 'iloc')
                or ((isinstance(obj, Series) and idxr_id == 'setitem'
                     and index.inferred_type in [
                         'floating', 'string', 'datetime64', 'period',
                         'timedelta64', 'boolean', 'categorical'
                     ]))
                or (idxr_id == 'ix' and index.inferred_type
                    in ['string', 'datetime64', 'period', 'boolean'])):
            idxr[nd3] = 0
        else:
            with pytest.raises((ValueError, AttributeError, TypeError,
                                pd.core.indexing.IndexingError),
                               match=msg):
                idxr[nd3] = 0

    def test_inf_upcast(self):
        # GH 16957
        # We should be able to use np.inf as a key
        # np.inf should cause an index to convert to float

        # Test with np.inf in rows
        df = DataFrame(columns=[0])
        df.loc[1] = 1
        df.loc[2] = 2
        df.loc[np.inf] = 3

        # make sure we can look up the value
        assert df.loc[np.inf, 0] == 3

        result = df.index
        expected = pd.Float64Index([1, 2, np.inf])
        tm.assert_index_equal(result, expected)

        # Test with np.inf in columns
        df = DataFrame()
        df.loc[0, 0] = 1
        df.loc[1, 1] = 2
        df.loc[0, np.inf] = 3

        result = df.columns
        expected = pd.Float64Index([0, 1, np.inf])
        tm.assert_index_equal(result, expected)

    def test_setitem_dtype_upcast(self):

        # GH3216
        df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
        df['c'] = np.nan
        assert df['c'].dtype == np.float64

        df.loc[0, 'c'] = 'foo'
        expected = DataFrame([{
            "a": 1,
            "c": 'foo'
        }, {
            "a": 3,
            "b": 2,
            "c": np.nan
        }])
        tm.assert_frame_equal(df, expected)

        # GH10280
        df = DataFrame(np.arange(6, dtype='int64').reshape(2, 3),
                       index=list('ab'),
                       columns=['foo', 'bar', 'baz'])

        for val in [3.14, 'wxyz']:
            left = df.copy()
            left.loc['a', 'bar'] = val
            right = DataFrame([[0, val, 2], [3, 4, 5]],
                              index=list('ab'),
                              columns=['foo', 'bar', 'baz'])

            tm.assert_frame_equal(left, right)
            assert is_integer_dtype(left['foo'])
            assert is_integer_dtype(left['baz'])

        left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0,
                         index=list('ab'),
                         columns=['foo', 'bar', 'baz'])
        left.loc['a', 'bar'] = 'wxyz'

        right = DataFrame([[0, 'wxyz', .2], [.3, .4, .5]],
                          index=list('ab'),
                          columns=['foo', 'bar', 'baz'])

        tm.assert_frame_equal(left, right)
        assert is_float_dtype(left['foo'])
        assert is_float_dtype(left['baz'])

    def test_dups_fancy_indexing(self):

        # GH 3455
        from pandas.util.testing import makeCustomDataframe as mkdf
        df = mkdf(10, 3)
        df.columns = ['a', 'a', 'b']
        result = df[['b', 'a']].columns
        expected = Index(['b', 'a', 'a'])
        tm.assert_index_equal(result, expected)

        # across dtypes
        df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']],
                       columns=list('aaaaaaa'))
        df.head()
        str(df)
        result = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']])
        result.columns = list('aaaaaaa')

        # TODO(wesm): unused?
        df_v = df.iloc[:, 4]  # noqa
        res_v = result.iloc[:, 4]  # noqa

        tm.assert_frame_equal(df, result)

        # GH 3561, dups not in selected order
        df = DataFrame(
            {
                'test': [5, 7, 9, 11],
                'test1': [4., 5, 6, 7],
                'other': list('abcd')
            },
            index=['A', 'A', 'B', 'C'])
        rows = ['C', 'B']
        expected = DataFrame(
            {
                'test': [11, 9],
                'test1': [7., 6],
                'other': ['d', 'c']
            },
            index=rows)
        result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        result = df.loc[Index(rows)]
        tm.assert_frame_equal(result, expected)

        rows = ['C', 'B', 'E']
        expected = DataFrame(
            {
                'test': [11, 9, np.nan],
                'test1': [7., 6, np.nan],
                'other': ['d', 'c', np.nan]
            },
            index=rows)

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        # see GH5553, make sure we use the right indexer
        rows = ['F', 'G', 'H', 'C', 'B', 'E']
        expected = DataFrame(
            {
                'test': [np.nan, np.nan, np.nan, 11, 9, np.nan],
                'test1': [np.nan, np.nan, np.nan, 7., 6, np.nan],
                'other': [np.nan, np.nan, np.nan, 'd', 'c', np.nan]
            },
            index=rows)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        # List containing only missing label
        dfnu = DataFrame(np.random.randn(5, 3), index=list('AABCD'))
        with pytest.raises(KeyError):
            dfnu.loc[['E']]

        # ToDo: check_index_type can be True after GH 11497

        # GH 4619; duplicate indexer with missing label
        df = DataFrame({"A": [0, 1, 2]})
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[[0, 8, 0]]
        expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0])
        tm.assert_frame_equal(result, expected, check_index_type=False)

        df = DataFrame({"A": list('abc')})
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[[0, 8, 0]]
        expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0])
        tm.assert_frame_equal(result, expected, check_index_type=False)

        # non unique with non unique selector
        df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C'])
        expected = DataFrame({'test': [5, 7, 5, 7, np.nan]},
                             index=['A', 'A', 'A', 'A', 'E'])
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[['A', 'A', 'E']]
        tm.assert_frame_equal(result, expected)

    def test_dups_fancy_indexing2(self):
        # GH 5835
        # dups on index and missing values
        df = DataFrame(np.random.randn(5, 5),
                       columns=['A', 'B', 'B', 'B', 'A'])

        expected = pd.concat([
            df.loc[:, ['A', 'B']],
            DataFrame(np.nan, columns=['C'], index=df.index)
        ],
                             axis=1)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = df.loc[:, ['A', 'B', 'C']]
        tm.assert_frame_equal(result, expected)

        # GH 6504, multi-axis indexing
        df = DataFrame(np.random.randn(9, 2),
                       index=[1, 1, 1, 2, 2, 2, 3, 3, 3],
                       columns=['a', 'b'])

        expected = df.iloc[0:6]
        result = df.loc[[1, 2]]
        tm.assert_frame_equal(result, expected)

        expected = df
        result = df.loc[:, ['a', 'b']]
        tm.assert_frame_equal(result, expected)

        expected = df.iloc[0:6, :]
        result = df.loc[[1, 2], ['a', 'b']]
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize('case', [lambda s: s, lambda s: s.loc])
    def test_duplicate_int_indexing(self, case):
        # GH 17347
        s = pd.Series(range(3), index=[1, 1, 3])
        expected = s[1]
        result = case(s)[[1]]
        tm.assert_series_equal(result, expected)

    def test_indexing_mixed_frame_bug(self):

        # GH3492
        df = DataFrame({
            'a': {
                1: 'aaa',
                2: 'bbb',
                3: 'ccc'
            },
            'b': {
                1: 111,
                2: 222,
                3: 333
            }
        })

        # this works, new column is created correctly
        df['test'] = df['a'].apply(lambda x: '_' if x == 'aaa' else x)

        # this does not work, ie column test is not changed
        idx = df['test'] == '_'
        temp = df.loc[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x)
        df.loc[idx, 'test'] = temp
        assert df.iloc[0, 2] == '-----'

        # if I look at df, then element [0,2] equals '_'. If instead I type
        # df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I
        # get '_'.

    def test_multitype_list_index_access(self):
        # GH 10610
        df = DataFrame(np.random.random((10, 5)),
                       columns=["a"] + [20, 21, 22, 23])

        with pytest.raises(KeyError):
            df[[22, 26, -8]]
        assert df[21].shape[0] == df.shape[0]

    def test_set_index_nan(self):

        # GH 3586
        df = DataFrame({
            'PRuid': {
                17: 'nonQC',
                18: 'nonQC',
                19: 'nonQC',
                20: '10',
                21: '11',
                22: '12',
                23: '13',
                24: '24',
                25: '35',
                26: '46',
                27: '47',
                28: '48',
                29: '59',
                30: '10'
            },
            'QC': {
                17: 0.0,
                18: 0.0,
                19: 0.0,
                20: np.nan,
                21: np.nan,
                22: np.nan,
                23: np.nan,
                24: 1.0,
                25: np.nan,
                26: np.nan,
                27: np.nan,
                28: np.nan,
                29: np.nan,
                30: np.nan
            },
            'data': {
                17: 7.9544899999999998,
                18: 8.0142609999999994,
                19: 7.8591520000000008,
                20: 0.86140349999999999,
                21: 0.87853110000000001,
                22: 0.8427041999999999,
                23: 0.78587700000000005,
                24: 0.73062459999999996,
                25: 0.81668560000000001,
                26: 0.81927080000000008,
                27: 0.80705009999999999,
                28: 0.81440240000000008,
                29: 0.80140849999999997,
                30: 0.81307740000000006
            },
            'year': {
                17: 2006,
                18: 2007,
                19: 2008,
                20: 1985,
                21: 1985,
                22: 1985,
                23: 1985,
                24: 1985,
                25: 1985,
                26: 1985,
                27: 1985,
                28: 1985,
                29: 1985,
                30: 1986
            }
        }).reset_index()

        result = df.set_index(['year', 'PRuid',
                               'QC']).reset_index().reindex(columns=df.columns)
        tm.assert_frame_equal(result, df)

    def test_multi_assign(self):

        # GH 3626, an assignment of a sub-df to a df
        df = DataFrame({
            'FC': ['a', 'b', 'a', 'b', 'a', 'b'],
            'PF': [0, 0, 0, 0, 1, 1],
            'col1': list(range(6)),
            'col2': list(range(6, 12)),
        })
        df.iloc[1, 0] = np.nan
        df2 = df.copy()

        mask = ~df2.FC.isna()
        cols = ['col1', 'col2']

        dft = df2 * 2
        dft.iloc[3, 3] = np.nan

        expected = DataFrame({
            'FC': ['a', np.nan, 'a', 'b', 'a', 'b'],
            'PF': [0, 0, 0, 0, 1, 1],
            'col1': Series([0, 1, 4, 6, 8, 10]),
            'col2': [12, 7, 16, np.nan, 20, 22]
        })

        # frame on rhs
        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        # with an ndarray on rhs
        # coerces to float64 because values has float64 dtype
        # GH 14001
        expected = DataFrame({
            'FC': ['a', np.nan, 'a', 'b', 'a', 'b'],
            'PF': [0, 0, 0, 0, 1, 1],
            'col1': [0., 1., 4., 6., 8., 10.],
            'col2': [12, 7, 16, np.nan, 20, 22]
        })
        df2 = df.copy()
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)

        # broadcasting on the rhs is required
        df = DataFrame(
            dict(A=[1, 2, 0, 0, 0],
                 B=[0, 0, 0, 10, 11],
                 C=[0, 0, 0, 10, 11],
                 D=[3, 4, 5, 6, 7]))

        expected = df.copy()
        mask = expected['A'] == 0
        for col in ['A', 'B']:
            expected.loc[mask, col] = df['D']

        df.loc[df['A'] == 0, ['A', 'B']] = df['D']
        tm.assert_frame_equal(df, expected)

    def test_setitem_list(self):

        # GH 6043
        # ix with a list
        df = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            simplefilter("ignore")
            df.ix[1, 0] = [1, 2, 3]
            df.ix[1, 0] = [1, 2]

        result = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            simplefilter("ignore")
            result.ix[1, 0] = [1, 2]

        tm.assert_frame_equal(result, df)

        # ix with an object
        class TO:
            def __init__(self, value):
                self.value = value

            def __str__(self):
                return "[{0}]".format(self.value)

            __repr__ = __str__

            def __eq__(self, other):
                return self.value == other.value

            def view(self):
                return self

        df = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            simplefilter("ignore")
            df.ix[1, 0] = TO(1)
            df.ix[1, 0] = TO(2)

        result = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            simplefilter("ignore")
            result.ix[1, 0] = TO(2)

        tm.assert_frame_equal(result, df)

        # remains object dtype even after setting it back
        df = DataFrame(index=[0, 1], columns=[0])
        with catch_warnings(record=True):
            simplefilter("ignore")
            df.ix[1, 0] = TO(1)
            df.ix[1, 0] = np.nan
        result = DataFrame(index=[0, 1], columns=[0])

        tm.assert_frame_equal(result, df)

    def test_string_slice(self):
        # GH 14424
        # string indexing against datetimelike with object
        # dtype should properly raises KeyError
        df = DataFrame([1], Index([pd.Timestamp('2011-01-01')], dtype=object))
        assert df.index.is_all_dates
        with pytest.raises(KeyError):
            df['2011']

        with pytest.raises(KeyError):
            df.loc['2011', 0]

        df = DataFrame()
        assert not df.index.is_all_dates
        with pytest.raises(KeyError):
            df['2011']

        with pytest.raises(KeyError):
            df.loc['2011', 0]

    def test_astype_assignment(self):

        # GH4312 (iloc)
        df_orig = DataFrame([['1', '2', '3', '.4', 5, 6., 'foo']],
                            columns=list('ABCDEFG'))

        df = df_orig.copy()
        df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64)
        expected = DataFrame([[1, 2, '3', '.4', 5, 6., 'foo']],
                             columns=list('ABCDEFG'))
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True)
        expected = DataFrame([[1, 2, '3', '.4', 5, 6., 'foo']],
                             columns=list('ABCDEFG'))
        tm.assert_frame_equal(df, expected)

        # GH5702 (loc)
        df = df_orig.copy()
        df.loc[:, 'A'] = df.loc[:, 'A'].astype(np.int64)
        expected = DataFrame([[1, '2', '3', '.4', 5, 6., 'foo']],
                             columns=list('ABCDEFG'))
        tm.assert_frame_equal(df, expected)

        df = df_orig.copy()
        df.loc[:, ['B', 'C']] = df.loc[:, ['B', 'C']].astype(np.int64)
        expected = DataFrame([['1', 2, 3, '.4', 5, 6., 'foo']],
                             columns=list('ABCDEFG'))
        tm.assert_frame_equal(df, expected)

        # full replacements / no nans
        df = DataFrame({'A': [1., 2., 3., 4.]})
        df.iloc[:, 0] = df['A'].astype(np.int64)
        expected = DataFrame({'A': [1, 2, 3, 4]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame({'A': [1., 2., 3., 4.]})
        df.loc[:, 'A'] = df['A'].astype(np.int64)
        expected = DataFrame({'A': [1, 2, 3, 4]})
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize("index,val", [
        (Index([0, 1, 2]), 2),
        (Index([0, 1, '2']), '2'),
        (Index([0, 1, 2, np.inf, 4]), 4),
        (Index([0, 1, 2, np.nan, 4]), 4),
        (Index([0, 1, 2, np.inf]), np.inf),
        (Index([0, 1, 2, np.nan]), np.nan),
    ])
    def test_index_contains(self, index, val):
        assert val in index

    @pytest.mark.parametrize(
        "index,val",
        [
            (Index([0, 1, 2]), '2'),
            (Index([0, 1, '2']), 2),
            (Index([0, 1, 2, np.inf]), 4),
            (Index([0, 1, 2, np.nan]), 4),
            (Index([0, 1, 2, np.inf]), np.nan),
            (Index([0, 1, 2, np.nan]), np.inf),
            # Checking if np.inf in Int64Index should not cause an OverflowError
            # Related to GH 16957
            (pd.Int64Index([0, 1, 2]), np.inf),
            (pd.Int64Index([0, 1, 2]), np.nan),
            (pd.UInt64Index([0, 1, 2]), np.inf),
            (pd.UInt64Index([0, 1, 2]), np.nan),
        ])
    def test_index_not_contains(self, index, val):
        assert val not in index

    @pytest.mark.parametrize("index,val", [
        (Index([0, 1, '2']), 0),
        (Index([0, 1, '2']), '2'),
    ])
    def test_mixed_index_contains(self, index, val):
        # GH 19860
        assert val in index

    @pytest.mark.parametrize("index,val", [
        (Index([0, 1, '2']), '1'),
        (Index([0, 1, '2']), 2),
    ])
    def test_mixed_index_not_contains(self, index, val):
        # GH 19860
        assert val not in index

    def test_contains_with_float_index(self):
        # GH#22085
        integer_index = pd.Int64Index([0, 1, 2, 3])
        uinteger_index = pd.UInt64Index([0, 1, 2, 3])
        float_index = pd.Float64Index([0.1, 1.1, 2.2, 3.3])

        for index in (integer_index, uinteger_index):
            assert 1.1 not in index
            assert 1.0 in index
            assert 1 in index

        assert 1.1 in float_index
        assert 1.0 not in float_index
        assert 1 not in float_index

    def test_index_type_coercion(self):

        with catch_warnings(record=True):
            simplefilter("ignore")

            # GH 11836
            # if we have an index type and set it with something that looks
            # to numpy like the same, but is actually, not
            # (e.g. setting with a float or string '0')
            # then we need to coerce to object

            # integer indexes
            for s in [Series(range(5)), Series(range(5), index=range(1, 6))]:

                assert s.index.is_integer()

                for indexer in [lambda x: x.ix, lambda x: x.loc, lambda x: x]:
                    s2 = s.copy()
                    indexer(s2)[0.1] = 0
                    assert s2.index.is_floating()
                    assert indexer(s2)[0.1] == 0

                    s2 = s.copy()
                    indexer(s2)[0.0] = 0
                    exp = s.index
                    if 0 not in s:
                        exp = Index(s.index.tolist() + [0])
                    tm.assert_index_equal(s2.index, exp)

                    s2 = s.copy()
                    indexer(s2)['0'] = 0
                    assert s2.index.is_object()

            for s in [Series(range(5), index=np.arange(5.))]:

                assert s.index.is_floating()

                for idxr in [lambda x: x.ix, lambda x: x.loc, lambda x: x]:

                    s2 = s.copy()
                    idxr(s2)[0.1] = 0
                    assert s2.index.is_floating()
                    assert idxr(s2)[0.1] == 0

                    s2 = s.copy()
                    idxr(s2)[0.0] = 0
                    tm.assert_index_equal(s2.index, s.index)

                    s2 = s.copy()
                    idxr(s2)['0'] = 0
                    assert s2.index.is_object()