Пример #1
0
def forward_iv_test():
    df = pd.read_csv('credit_old.csv')
    df = df[['Age', 'target']]
    df = df.dropna()

    t = forwardSplit(df['Age'], df['target'])
    t.fit(sby='iv', minv=0.1, init_split=20)
    print(
        t.bins
    )  # [16. 25. 29. 33. 36. 38. 40. 42. 44. 46. 48. 50. 58. 60. 63. 94.]
    t = forwardSplit(df['Age'], df['target'])
    t.fit(sby='iv', num_split=4, init_split=20, min_sample=len(df) * 0.2)
    print(t.bins)  # [16. 38. 50. 94.]
    t.fit(sby='woeiv', num_split=4, init_split=20)
    print(t.bins)  # [16. 25. 33. 36. 38. 94.]
    print("bin\twoe")
    for i in range(len(t.bins) - 1):
        v = t.value[(t.x < t.bins[i + 1]) & (t.x >= t.bins[i])]
        woe = t._cal_woe(v)
        print((t.bins[i], t.bins[i + 1]), woe)
Пример #2
0
def forward_woe_test_cat():
    df = pd.read_csv('credit_old.csv')
    df = df[['Branch', 'target']]
    df = df.dropna()

    t = forwardSplit(df['Branch'], df['target'], categorical=True)
    t.fit(sby='woe', minv=0.01, init_split=0, num_split=4)
    print(t.bins)
    for i in range(len(t.bins)):
        v = t.value[np.isin(t.x_idx, t.bins[i])]
        woe = t._cal_woe(v)
        print(t.bins[i], woe)
Пример #3
0
def forward_iv_test2():
    df = pd.read_csv('resolution.csv')
    df = df.dropna()

    t = forwardSplit(df['x'], df['y'], missing=-1)
    t.fit(sby='woeiv', minv=0.1, init_split=0, num_split=4)
    print(t.bins)
    print("bin\twoe")
    for i in range(len(t.bins) - 1):
        v = t.value[(t.x < t.bins[i + 1]) & (t.x >= t.bins[i])]
        woe = t._cal_woe(v)
        print((t.bins[i], t.bins[i + 1]), woe)