示例#1
0
    def test_basic_types(self):
        # GH 10531
        s_list = list('abc')
        s_series = Series(s_list)
        s_df = DataFrame({'a': [0, 1, 0, 1, 2],
                          'b': ['A', 'A', 'B', 'C', 'C'],
                          'c': [2, 3, 3, 3, 2]})

        expected = DataFrame({'a': [1, 0, 0],
                              'b': [0, 1, 0],
                              'c': [0, 0, 1]},
                             dtype='uint8',
                             columns=list('abc'))
        if not self.sparse:
            compare = tm.assert_frame_equal
        else:
            expected = expected.to_sparse(fill_value=0, kind='integer')
            compare = tm.assert_sp_frame_equal

        result = get_dummies(s_list, sparse=self.sparse)
        compare(result, expected)

        result = get_dummies(s_series, sparse=self.sparse)
        compare(result, expected)

        result = get_dummies(s_df, sparse=self.sparse, columns=s_df.columns)
        tm.assert_series_equal(result.get_dtype_counts(),
                               Series({'uint8': 8}))

        result = get_dummies(s_df, sparse=self.sparse, columns=['a'])
        expected = Series({'uint8': 3, 'int64': 1, 'object': 1}).sort_values()
        tm.assert_series_equal(result.get_dtype_counts().sort_values(),
                               expected)
示例#2
0
 def test_dataframe_dummies_all_obj(self):
     df = self.df[['A', 'B']]
     result = get_dummies(df, sparse=self.sparse)
     expected = DataFrame({'A_a': [1, 0, 1],
                           'A_b': [0, 1, 0],
                           'B_b': [1, 1, 0],
                           'B_c': [0, 0, 1]}, dtype=np.uint8)
     assert_frame_equal(result, expected)
示例#3
0
    def test_just_na(self):
        just_na_list = [np.nan]
        just_na_series = Series(just_na_list)
        just_na_series_index = Series(just_na_list, index=['A'])

        res_list = get_dummies(just_na_list, sparse=self.sparse)
        res_series = get_dummies(just_na_series, sparse=self.sparse)
        res_series_index = get_dummies(just_na_series_index,
                                       sparse=self.sparse)

        self.assertEqual(res_list.empty, True)
        self.assertEqual(res_series.empty, True)
        self.assertEqual(res_series_index.empty, True)

        self.assertEqual(res_list.index.tolist(), [0])
        self.assertEqual(res_series.index.tolist(), [0])
        self.assertEqual(res_series_index.index.tolist(), ['A'])
示例#4
0
    def test_just_na(self):
        just_na_list = [np.nan]
        just_na_series = Series(just_na_list)
        just_na_series_index = Series(just_na_list, index=['A'])

        res_list = get_dummies(just_na_list, sparse=self.sparse)
        res_series = get_dummies(just_na_series, sparse=self.sparse)
        res_series_index = get_dummies(just_na_series_index,
                                       sparse=self.sparse)

        assert res_list.empty
        assert res_series.empty
        assert res_series_index.empty

        assert res_list.index.tolist() == [0]
        assert res_series.index.tolist() == [0]
        assert res_series_index.index.tolist() == ['A']
示例#5
0
    def test_basic_drop_first_one_level(self):
        # Test the case that categorical variable only has one level.
        s_list = list('aaa')
        s_series = Series(s_list)
        s_series_index = Series(s_list, list('ABC'))

        expected = DataFrame(index=np.arange(3))

        result = get_dummies(s_list, sparse=self.sparse, drop_first=True)
        assert_frame_equal(result, expected)

        result = get_dummies(s_series, sparse=self.sparse, drop_first=True)
        assert_frame_equal(result, expected)

        expected = DataFrame(index=list('ABC'))
        result = get_dummies(s_series_index, sparse=self.sparse,
                             drop_first=True)
        assert_frame_equal(result, expected)
    def test_basic_drop_first_one_level(self):
        # Test the case that categorical variable only has one level.
        s_list = list('aaa')
        s_series = Series(s_list)
        s_series_index = Series(s_list, list('ABC'))

        expected = DataFrame(index=np.arange(3))

        result = get_dummies(s_list, sparse=self.sparse, drop_first=True)
        assert_frame_equal(result, expected)

        result = get_dummies(s_series, sparse=self.sparse, drop_first=True)
        assert_frame_equal(result, expected)

        expected = DataFrame(index=list('ABC'))
        result = get_dummies(s_series_index, sparse=self.sparse,
                             drop_first=True)
        assert_frame_equal(result, expected)
示例#7
0
 def test_dataframe_dummies_drop_first(self):
     df = self.df[['A', 'B']]
     result = get_dummies(df, sparse=self.sparse, drop_first=True)
     expected = DataFrame({
         'A_b': [0, 1, 0],
         'B_c': [0, 0, 1]
     },
                          dtype=np.uint8)
     assert_frame_equal(result, expected)
示例#8
0
 def test_dataframe_dummies_prefix_str(self):
     # not that you should do this...
     df = self.df
     result = get_dummies(df, prefix='bad', sparse=self.sparse)
     expected = DataFrame(
         [[1, 1, 0, 1, 0], [2, 0, 1, 1, 0], [3, 1, 0, 0, 1]],
         columns=['C', 'bad_a', 'bad_b', 'bad_b', 'bad_c'],
         dtype=np.uint8)
     expected = expected.astype({"C": np.int64})
     assert_frame_equal(result, expected)
示例#9
0
    def test_basic(self):
        s_list = list('abc')
        s_series = Series(s_list)
        s_series_index = Series(s_list, list('ABC'))

        expected = DataFrame({'a': {0: 1,
                                    1: 0,
                                    2: 0},
                              'b': {0: 0,
                                    1: 1,
                                    2: 0},
                              'c': {0: 0,
                                    1: 0,
                                    2: 1}}, dtype=np.uint8)
        assert_frame_equal(get_dummies(s_list, sparse=self.sparse), expected)
        assert_frame_equal(get_dummies(s_series, sparse=self.sparse), expected)

        expected.index = list('ABC')
        assert_frame_equal(
            get_dummies(s_series_index, sparse=self.sparse), expected)
示例#10
0
    def test_dataframe_dummies_drop_first_with_na(self):
        df = self.df
        df.loc[3, :] = [np.nan, np.nan, np.nan]
        result = get_dummies(df, dummy_na=True, sparse=self.sparse,
                             drop_first=True)
        expected = DataFrame({'C': [1, 2, 3, np.nan],
                              'A_b': [0, 1, 0, 0],
                              'A_nan': [0, 0, 0, 1],
                              'B_c': [0, 0, 1, 0],
                              'B_nan': [0, 0, 0, 1]})
        cols = ['A_b', 'A_nan', 'B_c', 'B_nan']
        expected[cols] = expected[cols].astype(np.uint8)

        expected = expected[['C', 'A_b', 'A_nan', 'B_c', 'B_nan']]
        assert_frame_equal(result, expected)

        result = get_dummies(df, dummy_na=False, sparse=self.sparse,
                             drop_first=True)
        expected = expected[['C', 'A_b', 'B_c']]
        assert_frame_equal(result, expected)
示例#11
0
 def test_dataframe_dummies_subset(self):
     df = self.df
     result = get_dummies(df, prefix=['from_A'], columns=['A'],
                          sparse=self.sparse)
     expected = DataFrame({'from_A_a': [1, 0, 1],
                           'from_A_b': [0, 1, 0],
                           'B': ['b', 'b', 'c'],
                           'C': [1, 2, 3]})
     cols = ['from_A_a', 'from_A_b']
     expected[cols] = expected[cols].astype(np.uint8)
     assert_frame_equal(result, expected)
示例#12
0
    def test_dataframe_dummies_prefix_sep(self):
        df = self.df
        result = get_dummies(df, prefix_sep='..', sparse=self.sparse)
        expected = DataFrame({'C': [1, 2, 3],
                              'A..a': [1, 0, 1],
                              'A..b': [0, 1, 0],
                              'B..b': [1, 1, 0],
                              'B..c': [0, 0, 1]})
        expected = expected[['C', 'A..a', 'A..b', 'B..b', 'B..c']]
        cols = expected.columns[1:]
        expected[cols] = expected[cols].astype(np.uint8)
        assert_frame_equal(result, expected)

        result = get_dummies(df, prefix_sep=['..', '__'], sparse=self.sparse)
        expected = expected.rename(columns={'B..b': 'B__b', 'B..c': 'B__c'})
        assert_frame_equal(result, expected)

        result = get_dummies(df, prefix_sep={'A': '..',
                                             'B': '__'}, sparse=self.sparse)
        assert_frame_equal(result, expected)
示例#13
0
    def test_dataframe_dummies_drop_first_with_na(self):
        df = self.df
        df.loc[3, :] = [np.nan, np.nan, np.nan]
        result = get_dummies(df, dummy_na=True, sparse=self.sparse,
                             drop_first=True)
        expected = DataFrame({'C': [1, 2, 3, np.nan],
                              'A_b': [0, 1, 0, 0],
                              'A_nan': [0, 0, 0, 1],
                              'B_c': [0, 0, 1, 0],
                              'B_nan': [0, 0, 0, 1]})
        cols = ['A_b', 'A_nan', 'B_c', 'B_nan']
        expected[cols] = expected[cols].astype(np.uint8)

        expected = expected[['C', 'A_b', 'A_nan', 'B_c', 'B_nan']]
        assert_frame_equal(result, expected)

        result = get_dummies(df, dummy_na=False, sparse=self.sparse,
                             drop_first=True)
        expected = expected[['C', 'A_b', 'B_c']]
        assert_frame_equal(result, expected)
示例#14
0
    def test_basic(self):
        s_list = list('abc')
        s_series = Series(s_list)
        s_series_index = Series(s_list, list('ABC'))

        expected = DataFrame({'a': {0: 1,
                                    1: 0,
                                    2: 0},
                              'b': {0: 0,
                                    1: 1,
                                    2: 0},
                              'c': {0: 0,
                                    1: 0,
                                    2: 1}}, dtype=np.uint8)
        assert_frame_equal(get_dummies(s_list, sparse=self.sparse), expected)
        assert_frame_equal(get_dummies(s_series, sparse=self.sparse), expected)

        expected.index = list('ABC')
        assert_frame_equal(
            get_dummies(s_series_index, sparse=self.sparse), expected)
示例#15
0
 def test_dataframe_dummies_prefix_str(self):
     # not that you should do this...
     df = self.df
     result = get_dummies(df, prefix='bad', sparse=self.sparse)
     expected = DataFrame([[1, 1, 0, 1, 0],
                           [2, 0, 1, 1, 0],
                           [3, 1, 0, 0, 1]],
                          columns=['C', 'bad_a', 'bad_b', 'bad_b', 'bad_c'],
                          dtype=np.uint8)
     expected = expected.astype({"C": np.int64})
     assert_frame_equal(result, expected)
示例#16
0
 def test_dataframe_dummies_subset(self):
     df = self.df
     result = get_dummies(df, prefix=['from_A'], columns=['A'],
                          sparse=self.sparse)
     expected = DataFrame({'from_A_a': [1, 0, 1],
                           'from_A_b': [0, 1, 0],
                           'B': ['b', 'b', 'c'],
                           'C': [1, 2, 3]})
     cols = ['from_A_a', 'from_A_b']
     expected[cols] = expected[cols].astype(np.uint8)
     assert_frame_equal(result, expected)
示例#17
0
    def test_dataframe_dummies_prefix_sep(self):
        df = self.df
        result = get_dummies(df, prefix_sep='..', sparse=self.sparse)
        expected = DataFrame({'C': [1, 2, 3],
                              'A..a': [1, 0, 1],
                              'A..b': [0, 1, 0],
                              'B..b': [1, 1, 0],
                              'B..c': [0, 0, 1]})
        expected = expected[['C', 'A..a', 'A..b', 'B..b', 'B..c']]
        cols = expected.columns[1:]
        expected[cols] = expected[cols].astype(np.uint8)
        assert_frame_equal(result, expected)

        result = get_dummies(df, prefix_sep=['..', '__'], sparse=self.sparse)
        expected = expected.rename(columns={'B..b': 'B__b', 'B..c': 'B__c'})
        assert_frame_equal(result, expected)

        result = get_dummies(df, prefix_sep={'A': '..',
                                             'B': '__'}, sparse=self.sparse)
        assert_frame_equal(result, expected)
示例#18
0
 def test_dataframe_dummies_mix_default(self):
     df = self.df
     result = get_dummies(df, sparse=self.sparse)
     expected = DataFrame({'C': [1, 2, 3],
                           'A_a': [1, 0, 1],
                           'A_b': [0, 1, 0],
                           'B_b': [1, 1, 0],
                           'B_c': [0, 0, 1]})
     cols = ['A_a', 'A_b', 'B_b', 'B_c']
     expected[cols] = expected[cols].astype(np.uint8)
     expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']]
     assert_frame_equal(result, expected)
示例#19
0
 def test_dataframe_dummies_all_obj(self):
     df = self.df[['A', 'B']]
     result = get_dummies(df, sparse=self.sparse)
     expected = DataFrame(
         {
             'A_a': [1, 0, 1],
             'A_b': [0, 1, 0],
             'B_b': [1, 1, 0],
             'B_c': [0, 0, 1]
         },
         dtype=np.uint8)
     assert_frame_equal(result, expected)
示例#20
0
 def test_dataframe_dummies_drop_first_with_categorical(self):
     df = self.df
     df['cat'] = pd.Categorical(['x', 'y', 'y'])
     result = get_dummies(df, sparse=self.sparse, drop_first=True)
     expected = DataFrame({'C': [1, 2, 3],
                           'A_b': [0, 1, 0],
                           'B_c': [0, 0, 1],
                           'cat_y': [0, 1, 1]})
     cols = ['A_b', 'B_c', 'cat_y']
     expected[cols] = expected[cols].astype(np.uint8)
     expected = expected[['C', 'A_b', 'B_c', 'cat_y']]
     assert_frame_equal(result, expected)
示例#21
0
 def test_dataframe_dummies_drop_first_with_categorical(self):
     df = self.df
     df['cat'] = pd.Categorical(['x', 'y', 'y'])
     result = get_dummies(df, sparse=self.sparse, drop_first=True)
     expected = DataFrame({'C': [1, 2, 3],
                           'A_b': [0, 1, 0],
                           'B_c': [0, 0, 1],
                           'cat_y': [0, 1, 1]})
     cols = ['A_b', 'B_c', 'cat_y']
     expected[cols] = expected[cols].astype(np.uint8)
     expected = expected[['C', 'A_b', 'B_c', 'cat_y']]
     assert_frame_equal(result, expected)
示例#22
0
 def test_dataframe_dummies_mix_default(self):
     df = self.df
     result = get_dummies(df, sparse=self.sparse)
     expected = DataFrame({'C': [1, 2, 3],
                           'A_a': [1, 0, 1],
                           'A_b': [0, 1, 0],
                           'B_b': [1, 1, 0],
                           'B_c': [0, 0, 1]})
     cols = ['A_a', 'A_b', 'B_b', 'B_c']
     expected[cols] = expected[cols].astype(np.uint8)
     expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']]
     assert_frame_equal(result, expected)
示例#23
0
    def test_include_na(self):
        s = ['a', 'b', np.nan]
        res = get_dummies(s, sparse=self.sparse)
        exp = DataFrame({'a': {0: 1, 1: 0, 2: 0},
                         'b': {0: 0, 1: 1, 2: 0}}, dtype=np.uint8)
        assert_frame_equal(res, exp)

        # Sparse dataframes do not allow nan labelled columns, see #GH8822
        res_na = get_dummies(s, dummy_na=True, sparse=self.sparse)
        exp_na = DataFrame({nan: {0: 0, 1: 0, 2: 1},
                            'a': {0: 1, 1: 0, 2: 0},
                            'b': {0: 0, 1: 1, 2: 0}},
                           dtype=np.uint8)
        exp_na = exp_na.reindex_axis(['a', 'b', nan], 1)
        # hack (NaN handling in assert_index_equal)
        exp_na.columns = res_na.columns
        assert_frame_equal(res_na, exp_na)

        res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse)
        exp_just_na = DataFrame(Series(1, index=[0]), columns=[nan],
                                dtype=np.uint8)
        tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values)
示例#24
0
    def test_include_na(self):
        s = ['a', 'b', np.nan]
        res = get_dummies(s, sparse=self.sparse)
        exp = DataFrame({'a': {0: 1, 1: 0, 2: 0},
                         'b': {0: 0, 1: 1, 2: 0}}, dtype=np.uint8)
        assert_frame_equal(res, exp)

        # Sparse dataframes do not allow nan labelled columns, see #GH8822
        res_na = get_dummies(s, dummy_na=True, sparse=self.sparse)
        exp_na = DataFrame({nan: {0: 0, 1: 0, 2: 1},
                            'a': {0: 1, 1: 0, 2: 0},
                            'b': {0: 0, 1: 1, 2: 0}},
                           dtype=np.uint8)
        exp_na = exp_na.reindex_axis(['a', 'b', nan], 1)
        # hack (NaN handling in assert_index_equal)
        exp_na.columns = res_na.columns
        assert_frame_equal(res_na, exp_na)

        res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse)
        exp_just_na = DataFrame(Series(1, index=[0]), columns=[nan],
                                dtype=np.uint8)
        tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values)
示例#25
0
    def test_dataframe_dummies_preserve_categorical_dtype(self):
        # GH13854
        for ordered in [False, True]:
            cat = pd.Categorical(list("xy"), categories=list("xyz"),
                                 ordered=ordered)
            result = get_dummies(cat)

            data = np.array([[1, 0, 0], [0, 1, 0]], dtype=np.uint8)
            cols = pd.CategoricalIndex(cat.categories,
                                       categories=cat.categories,
                                       ordered=ordered)
            expected = DataFrame(data, columns=cols)

            tm.assert_frame_equal(result, expected)
示例#26
0
    def test_basic_drop_first(self):
        # Basic case
        s_list = list('abc')
        s_series = Series(s_list)
        s_series_index = Series(s_list, list('ABC'))

        expected = DataFrame({'b': {0: 0,
                                    1: 1,
                                    2: 0},
                              'c': {0: 0,
                                    1: 0,
                                    2: 1}}, dtype=np.uint8)

        result = get_dummies(s_list, sparse=self.sparse, drop_first=True)
        assert_frame_equal(result, expected)

        result = get_dummies(s_series, sparse=self.sparse, drop_first=True)
        assert_frame_equal(result, expected)

        expected.index = list('ABC')
        result = get_dummies(s_series_index, sparse=self.sparse,
                             drop_first=True)
        assert_frame_equal(result, expected)
示例#27
0
 def test_dataframe_dummies_prefix_dict(self):
     prefixes = {'A': 'from_A', 'B': 'from_B'}
     df = DataFrame({'A': ['a', 'b', 'a'],
                     'B': ['b', 'b', 'c'],
                     'C': [1, 2, 3]})
     result = get_dummies(df, prefix=prefixes, sparse=self.sparse)
     expected = DataFrame({'from_A_a': [1, 0, 1],
                           'from_A_b': [0, 1, 0],
                           'from_B_b': [1, 1, 0],
                           'from_B_c': [0, 0, 1],
                           'C': [1, 2, 3]})
     cols = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']
     expected[cols] = expected[cols].astype(np.uint8)
     assert_frame_equal(result, expected)
示例#28
0
    def test_dataframe_dummies_preserve_categorical_dtype(self):
        # GH13854
        for ordered in [False, True]:
            cat = pd.Categorical(list("xy"), categories=list("xyz"),
                                 ordered=ordered)
            result = get_dummies(cat)

            data = np.array([[1, 0, 0], [0, 1, 0]], dtype=np.uint8)
            cols = pd.CategoricalIndex(cat.categories,
                                       categories=cat.categories,
                                       ordered=ordered)
            expected = DataFrame(data, columns=cols)

            tm.assert_frame_equal(result, expected)
示例#29
0
 def test_dataframe_dummies_prefix_dict(self):
     prefixes = {'A': 'from_A', 'B': 'from_B'}
     df = DataFrame({'A': ['a', 'b', 'a'],
                     'B': ['b', 'b', 'c'],
                     'C': [1, 2, 3]})
     result = get_dummies(df, prefix=prefixes, sparse=self.sparse)
     expected = DataFrame({'from_A_a': [1, 0, 1],
                           'from_A_b': [0, 1, 0],
                           'from_B_b': [1, 1, 0],
                           'from_B_c': [0, 0, 1],
                           'C': [1, 2, 3]})
     cols = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']
     expected[cols] = expected[cols].astype(np.uint8)
     assert_frame_equal(result, expected)
示例#30
0
    def test_basic_types(self):
        # GH 10531
        s_list = list('abc')
        s_series = Series(s_list)
        s_df = DataFrame({
            'a': [0, 1, 0, 1, 2],
            'b': ['A', 'A', 'B', 'C', 'C'],
            'c': [2, 3, 3, 3, 2]
        })

        expected = DataFrame({
            'a': [1, 0, 0],
            'b': [0, 1, 0],
            'c': [0, 0, 1]
        },
                             dtype='uint8',
                             columns=list('abc'))
        if not self.sparse:
            compare = tm.assert_frame_equal
        else:
            expected = expected.to_sparse(fill_value=0, kind='integer')
            compare = tm.assert_sp_frame_equal

        result = get_dummies(s_list, sparse=self.sparse)
        compare(result, expected)

        result = get_dummies(s_series, sparse=self.sparse)
        compare(result, expected)

        result = get_dummies(s_df, sparse=self.sparse, columns=s_df.columns)
        tm.assert_series_equal(result.get_dtype_counts(), Series({'uint8': 8}))

        result = get_dummies(s_df, sparse=self.sparse, columns=['a'])
        expected = Series({'uint8': 3, 'int64': 1, 'object': 1}).sort_values()
        tm.assert_series_equal(result.get_dtype_counts().sort_values(),
                               expected)
示例#31
0
    def test_basic_drop_first_NA(self):
        # Test NA hadling together with drop_first
        s_NA = ['a', 'b', np.nan]
        res = get_dummies(s_NA, sparse=self.sparse, drop_first=True)
        exp = DataFrame({'b': {0: 0,
                               1: 1,
                               2: 0}}, dtype=np.uint8)
        assert_frame_equal(res, exp)

        res_na = get_dummies(s_NA, dummy_na=True, sparse=self.sparse,
                             drop_first=True)
        exp_na = DataFrame({'b': {0: 0,
                                  1: 1,
                                  2: 0},
                            nan: {0: 0,
                                  1: 0,
                                  2: 1}}, dtype=np.uint8).reindex_axis(
                                      ['b', nan], 1)
        assert_frame_equal(res_na, exp_na)

        res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse,
                                  drop_first=True)
        exp_just_na = DataFrame(index=np.arange(1))
        assert_frame_equal(res_just_na, exp_just_na)
示例#32
0
 def test_unicode(self
                  ):  # See GH 6885 - get_dummies chokes on unicode values
     import unicodedata
     e = 'e'
     eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
     s = [e, eacute, eacute]
     res = get_dummies(s, prefix='letter', sparse=self.sparse)
     exp = DataFrame({'letter_e': {0: 1,
                                   1: 0,
                                   2: 0},
                      u('letter_%s') % eacute: {0: 0,
                                                1: 1,
                                                2: 1}},
                     dtype=np.uint8)
     assert_frame_equal(res, exp)
示例#33
0
    def test_basic_drop_first_NA(self):
        # Test NA hadling together with drop_first
        s_NA = ['a', 'b', np.nan]
        res = get_dummies(s_NA, sparse=self.sparse, drop_first=True)
        exp = DataFrame({'b': {0: 0,
                               1: 1,
                               2: 0}}, dtype=np.uint8)
        assert_frame_equal(res, exp)

        res_na = get_dummies(s_NA, dummy_na=True, sparse=self.sparse,
                             drop_first=True)
        exp_na = DataFrame({'b': {0: 0,
                                  1: 1,
                                  2: 0},
                            nan: {0: 0,
                                  1: 0,
                                  2: 1}}, dtype=np.uint8).reindex_axis(
                                      ['b', nan], 1)
        assert_frame_equal(res_na, exp_na)

        res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse,
                                  drop_first=True)
        exp_just_na = DataFrame(index=np.arange(1))
        assert_frame_equal(res_just_na, exp_just_na)
示例#34
0
 def test_unicode(self
                  ):  # See GH 6885 - get_dummies chokes on unicode values
     import unicodedata
     e = 'e'
     eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE')
     s = [e, eacute, eacute]
     res = get_dummies(s, prefix='letter', sparse=self.sparse)
     exp = DataFrame({'letter_e': {0: 1,
                                   1: 0,
                                   2: 0},
                      u('letter_%s') % eacute: {0: 0,
                                                1: 1,
                                                2: 1}},
                     dtype=np.uint8)
     assert_frame_equal(res, exp)
示例#35
0
def make_dummy_columns(mdc_df, column_name, prefix='', append_columns=[]):
    dummies = reshape.get_dummies(mdc_df[column_name], prefix=prefix)
    arr_append(append_columns, dummies.columns)
    mdc_df = mdc_df.join(dummies)
    return mdc_df
示例#36
0
 def test_dataframe_dummies_prefix_bad_length(self):
     with pytest.raises(ValueError):
         get_dummies(self.df, prefix=['too few'], sparse=self.sparse)
示例#37
0
 def test_dataframe_dummies_drop_first(self):
     df = self.df[['A', 'B']]
     result = get_dummies(df, sparse=self.sparse, drop_first=True)
     expected = DataFrame({'A_b': [0, 1, 0],
                           'B_c': [0, 0, 1]}, dtype=np.uint8)
     assert_frame_equal(result, expected)
示例#38
0
 def test_dataframe_dummies_prefix_sep_bad_length(self):
     with pytest.raises(ValueError):
         get_dummies(self.df, prefix_sep=['bad'], sparse=self.sparse)
示例#39
0
 def test_dataframe_dummies_prefix_bad_length(self):
     with tm.assertRaises(ValueError):
         get_dummies(self.df, prefix=['too few'], sparse=self.sparse)
示例#40
0
 def test_dataframe_dummies_prefix_sep_bad_length(self):
     with tm.assertRaises(ValueError):
         get_dummies(self.df, prefix_sep=['bad'], sparse=self.sparse)