def test_encode_nans(self, X, y, expected, columns):
     enc = WeightOfEvidenceEncoder(cols=['cat'])
     result = enc.fit_transform(pd.DataFrame(X, columns=['cat']), pd.Series(y))
     assert_array_almost_equal(result, pd.DataFrame(expected), decimal=3)
     ok_('cat' in enc._mapping)
     ok_(isinstance(enc._mapping['cat'], pd.DataFrame))
     eq_(enc._mapping['cat'].index[0], -99999)
     assert_array_equal(enc._mapping['cat'].index[1:], columns)
     assert_array_equal(enc._mapping['cat'].columns, ['pos', 'count', 'neg', 'value'])
 def test_encode_multiple_cols(self, X, y, expected):
     enc = WeightOfEvidenceEncoder(cols=['cat1', 'cat2'])
     result = enc.fit_transform(pd.DataFrame(X, columns=['cat1', 'cat2']), pd.Series(y))
     assert_array_almost_equal(result, pd.DataFrame(expected), decimal=2)
     ok_('cat1' in enc._mapping)
     ok_('cat2' in enc._mapping)
     ok_(isinstance(enc._mapping['cat1'], pd.DataFrame))
     ok_(isinstance(enc._mapping['cat2'], pd.DataFrame))
     assert_array_equal(enc._mapping['cat1'].index, ['a', 'b'])
     assert_array_equal(enc._mapping['cat2'].index, ['bar', 'foo'])
     assert_array_equal(enc._mapping['cat1'].columns, ['pos', 'count', 'neg', 'value'])
     assert_array_equal(enc._mapping['cat2'].columns, ['pos', 'count', 'neg', 'value'])
Пример #3
0
                               'addr_state'])

dumy_df = pd.DataFrame()
dumy_df['Default_Binary'] = df.loan_status.isin([
    'Default',
    'Charged Off',
    'Late (31-120 days)',
    'Does not meet the credit policy. Status:Charged Off'
])
dumy_df['Default_Binary'] = dumy_df.Default_Binary.astype(int)
y = pd.Series(dumy_df.Default_Binary)

encoder = WeightOfEvidenceEncoder(cols=['sub_grade',
                                        'zip_code',
                                        'addr_state'])
df_woe_1 = encoder.fit_transform(df_woe, y)

# Creating encoded data dataset
frames = [encoded_df_2,
          df_woe_1]
encoded_data = pd.concat(objs=frames,
                         axis=1,
                         join='outer',
                         copy=False,
                         sort=False)

# Extracting used columns names
frames = [df_encoder,
          df_woe]
encoded_col = pd.concat(objs=frames,
                        axis=1,