Exemplo n.º 1
0
def test_card_combiner_str_not_match():
    c = combiner.export()
    c['C'] = [['A'], ['B'], ['C']]
    com = Combiner().load(c)
    bins = com.transform(df)
    woe_transer = WOETransformer()
    woe = woe_transer.fit_transform(bins, target)

    card = ScoreCard(
        combiner=com,
        transer=woe_transer,
    )

    with pytest.raises(Exception) as e:
        # will raise an exception when fitting a card
        card.fit(woe, target)

    assert '\'C\' is not matched' in str(e.value)
Exemplo n.º 2
0
def test_card_combiner_str_not_match():
    c = combiner.export()
    c['C'] = [['A'], ['B'], ['C']]
    com = Combiner().set_rules(c)
    bins = com.transform(df)
    woe_transer = WOETransformer()
    woe = woe_transer.fit_transform(bins, target)

    model = LogisticRegression()
    model.fit(woe, target)

    with pytest.raises(Exception) as e:
        # will raise an exception when create a card
        card = ScoreCard(
            combiner=com,
            transer=woe_transer,
            model=model,
        )

    assert '\'C\' is not matched' in str(e.value)
Exemplo n.º 3
0
def num_bin(df:pd.DataFrame,cols:list=None,target:str='target',specials:list=None,
            bin_num_limit:int=5,count_distr_limit:float=0.05,sc_method='chimerge',
            non_mono_cols:list=None,init_bins=10,init_min_samples=0.05,init_method='chi',**kwargs):

    # 粗分箱,单调检验,分箱结果
    if not cols:
        cols = df.columns.difference([target]).tolist()

    if specials:
        specials = {k: specials for k in cols}

    if not non_mono_cols:
        non_mono_cols = []

    bind, ivd = dict(), dict()
    t0 = time.process_time()

    for col in cols:
        if col in non_mono_cols:
            bind[col] = woebin(dt=df, x=col, y=target, special_values=specials, bin_num_limit=bin_num_limit,
                               count_distr_limit=count_distr_limit, method=sc_method,print_info=False)[col]
            ivd[col] = bind[col]['total_iv'].unique()[0]

        else:
            c = Combiner()
            c.fit(X=df[col], y=df[target],n_bins=init_bins,min_samples=init_min_samples,method=init_method,**kwargs)
            init_points = c.export()[col]
            breaks_list = monotonous_bin(df=df, col=col, target=target,cutOffPoints=init_points, special_values=specials)

            bind[col] = woebin(dt=df, x=col, y=target, special_values=specials, breaks_list=breaks_list,
                               bin_num_limit=bin_num_limit,count_distr_limit=count_distr_limit,method=sc_method,
                               print_info=False)[col]
            ivd[col] = bind[col]['total_iv'].unique()[0]

    print(f'there are bing {len(cols)} using {int((time.process_time() - t0) * 100 / 60)} seconds')
    return bind, ivd
Exemplo n.º 4
0
def test_combiner_frame():
    res = Combiner().fit_transform(df, target)
    assert res.iloc[404, 1] == 2
Exemplo n.º 5
0
def test_combiner_unique_feature():
    f = Combiner().fit_transform(uni_feat, target, method = 'chi')
    assert f[451] == 0
Exemplo n.º 6
0
def test_combiner_with_str():
    f = Combiner().fit_transform(str_feat, target, method = 'chi')
    assert f[451] == 0
Exemplo n.º 7
0
def test_combiner():
    f = Combiner().fit_transform(feature, target, method = 'chi')
    assert f[451] == 3
Exemplo n.º 8
0
def test_combiner_labels_with_empty():
    combiner = Combiner().fit(df, 'target', n_bins = 4, empty_separate = True)
    res = combiner.transform(df, labels = True)
    assert res.loc[2, 'D'] == '4.nan'
Exemplo n.º 9
0
def test_combiner_empty_separate():
    combiner = Combiner()
    bins = combiner.fit_transform(df, 'target', n_bins = 4, empty_separate = True)
    mask = pd.isna(df['D'])
    assert (bins['D'][~mask] != 4).all()
Exemplo n.º 10
0
def test_combiner_step():
    combiner = Combiner().fit(df['A'], method = 'step', n_bins = 4)
    bins = combiner.export()
    assert bins['A'][1] == 4.5
Exemplo n.º 11
0
def test_combiner_export():
    combiner = Combiner().fit(df, target, method = 'chi', n_bins = 4)
    bins = combiner.export()
    assert isinstance(bins['B'][0], list)
Exemplo n.º 12
0
def test_combiner_labels():
    combiner = Combiner().fit(df, target)
    res = combiner.transform(df, labels = True)
    assert res.loc[451, 'A'] == '3.[3 ~ 4)'
Exemplo n.º 13
0
train = data[data['split'].isin(['Q1', 'Q2', 'Q3'])].drop('split', axis=1)
test = data[data['split'].isin(['Q4'])].drop('split', axis=1)

train_s, drops = select(train,
                        target='loan_status',
                        iv=0.005,
                        corr=0.8,
                        return_drop=True)
test_s = test[train_s.columns]
print('IV筛选不通过的特征为:\n', drops['iv'], '\n', 'corr筛选不通过的特征为:\n', drops['corr'])
print('处理完成,剩余{}特征'.format(train_s.shape[1]), '\n' * 2)

# ----------------------------------------------------------------------------------------------------------------------------------------------------
# 分箱
print('卡方分箱中'.center(60, '—'))
comb = Combiner()
columns = train_s.columns


def combine(data, target, columns=[], exclude=[]):  # 精细化分箱
    for i in columns[~columns.isin(exclude)]:
        data_i = pd.concat([data[i], data[target]], axis=1)
        comb.fit(data_i, y=target, method='chi', min_samples=0.1)
        bins = comb.export()
        print(bins)
        data_c = comb.transform(data_i, labels=True)
        bin_plot(data_c, x=i, target=target)
        plt.show()


# combine(train_s, target='loan_status', columns=columns, exclude=['loan_status'])
Exemplo n.º 14
0
        '[5 ~ 8)': 300,
        '[8 ~ inf)': 400,
        'nan': 500,
    },
    'B': {
        ','.join(list('ABCD')): 200,
        ','.join(list('EF')): 400,
        'else': 500,
    },
    'C': {
        'A': 200,
        'B': 100,
    },
}

combiner = Combiner()
bins = combiner.fit_transform(df, target, n_bins=5)
woe_transer = WOETransformer()
woe = woe_transer.fit_transform(bins, target)

# create a score card
card = ScoreCard(
    combiner=combiner,
    transer=woe_transer,
)
card.fit(woe, target)

FUZZ_THRESHOLD = 1e-4
TEST_SCORE = pytest.approx(453.58, FUZZ_THRESHOLD)

Exemplo n.º 15
0
def test_combiner_select_dtypes():
    res = Combiner().fit_transform(df, target, select_dtypes = 'number')
    assert res.loc[451, 'B'] == 'G'
Exemplo n.º 16
0
def test_combiner_exclude():
    res = Combiner().fit_transform(df, target, exclude = 'B')
    assert res.loc[451, 'B'] == 'G'
Exemplo n.º 17
0
def test_combiner_target_in_frame_kwargs():
    combiner = Combiner().fit(df, y = 'target', n_bins = 4)
    bins = combiner.export()
    assert bins['A'][1] == 6