Exemplo n.º 1
0
def num_bin(df:pd.DataFrame,cols:list=None,target:str='target',specials:list=None,
            bin_num_limit:int=5,count_distr_limit:float=0.05,sc_method='chimerge',
            non_mono_cols:list=None,init_bins=10,init_min_samples=0.05,init_method='chi',**kwargs):

    # 粗分箱,单调检验,分箱结果
    if not cols:
        cols = df.columns.difference([target]).tolist()

    if specials:
        specials = {k: specials for k in cols}

    if not non_mono_cols:
        non_mono_cols = []

    bind, ivd = dict(), dict()
    t0 = time.process_time()

    for col in cols:
        if col in non_mono_cols:
            bind[col] = woebin(dt=df, x=col, y=target, special_values=specials, bin_num_limit=bin_num_limit,
                               count_distr_limit=count_distr_limit, method=sc_method,print_info=False)[col]
            ivd[col] = bind[col]['total_iv'].unique()[0]

        else:
            c = Combiner()
            c.fit(X=df[col], y=df[target],n_bins=init_bins,min_samples=init_min_samples,method=init_method,**kwargs)
            init_points = c.export()[col]
            breaks_list = monotonous_bin(df=df, col=col, target=target,cutOffPoints=init_points, special_values=specials)

            bind[col] = woebin(dt=df, x=col, y=target, special_values=specials, breaks_list=breaks_list,
                               bin_num_limit=bin_num_limit,count_distr_limit=count_distr_limit,method=sc_method,
                               print_info=False)[col]
            ivd[col] = bind[col]['total_iv'].unique()[0]

    print(f'there are bing {len(cols)} using {int((time.process_time() - t0) * 100 / 60)} seconds')
    return bind, ivd
Exemplo n.º 2
0
def test_combiner_target_in_frame_kwargs():
    combiner = Combiner().fit(df, y = 'target', n_bins = 4)
    bins = combiner.export()
    assert bins['A'][1] == 6
Exemplo n.º 3
0
def test_combiner_step():
    combiner = Combiner().fit(df['A'], method = 'step', n_bins = 4)
    bins = combiner.export()
    assert bins['A'][1] == 4.5
Exemplo n.º 4
0
def test_combiner_export():
    combiner = Combiner().fit(df, target, method = 'chi', n_bins = 4)
    bins = combiner.export()
    assert isinstance(bins['B'][0], list)