def forward_iv_test(): df = pd.read_csv('credit_old.csv') df = df[['Age', 'target']] df = df.dropna() t = forwardSplit(df['Age'], df['target']) t.fit(sby='iv', minv=0.1, init_split=20) print( t.bins ) # [16. 25. 29. 33. 36. 38. 40. 42. 44. 46. 48. 50. 58. 60. 63. 94.] t = forwardSplit(df['Age'], df['target']) t.fit(sby='iv', num_split=4, init_split=20, min_sample=len(df) * 0.2) print(t.bins) # [16. 38. 50. 94.] t.fit(sby='woeiv', num_split=4, init_split=20) print(t.bins) # [16. 25. 33. 36. 38. 94.] print("bin\twoe") for i in range(len(t.bins) - 1): v = t.value[(t.x < t.bins[i + 1]) & (t.x >= t.bins[i])] woe = t._cal_woe(v) print((t.bins[i], t.bins[i + 1]), woe)
def forward_woe_test_cat(): df = pd.read_csv('credit_old.csv') df = df[['Branch', 'target']] df = df.dropna() t = forwardSplit(df['Branch'], df['target'], categorical=True) t.fit(sby='woe', minv=0.01, init_split=0, num_split=4) print(t.bins) for i in range(len(t.bins)): v = t.value[np.isin(t.x_idx, t.bins[i])] woe = t._cal_woe(v) print(t.bins[i], woe)
def forward_iv_test2(): df = pd.read_csv('resolution.csv') df = df.dropna() t = forwardSplit(df['x'], df['y'], missing=-1) t.fit(sby='woeiv', minv=0.1, init_split=0, num_split=4) print(t.bins) print("bin\twoe") for i in range(len(t.bins) - 1): v = t.value[(t.x < t.bins[i + 1]) & (t.x >= t.bins[i])] woe = t._cal_woe(v) print((t.bins[i], t.bins[i + 1]), woe)