示例#1
0
def run(kf, data, model, label='flag_y'):
    """
    Train model, predict on test set and get model performance.

    :param kf:
    :param data:
    :param model:
    :param label:
    :return:
    """
    defaults, gini_trains, gini_tests, auc_trains, auc_tests, ks_trains, ks_tests, psis, models = [], [], [], [], [], \
                                                                                                  [], [], [], []
    # 交叉验证
    kf_list = list(kf.split(data))
    for i, index in enumerate(kf_list):
        # 训练
        X = data.drop(columns=label)
        y = data[label]
        train_index, test_index = index[0], index[1]
        X_train, y_train = X.iloc[train_index], y.iloc[train_index]
        X_test, y_test = X.iloc[test_index], y.iloc[test_index]
        model.fit(X_train, y_train)
        y_pred = model.predict_proba(X_train)[:, 1]
        y_test_pred = model.predict_proba(X_test)[:, 1]
        # 计算指标
        ks_train, ks_test = calc_ks(y_pred, y_train), calc_ks(y_test_pred, y_test)
        auc_train, auc_test = calc_auc(y_pred, y_train), calc_auc(y_test_pred, y_test)
        gini_train, gini_test = calc_gini(y_pred, y_train), calc_gini(y_test_pred, y_test)
        psi = calc_psi(y_pred, y_test_pred)
        default = 1.8 * ks_test - 0.8 * abs(ks_train - ks_test)
        defaults.append(default)
        gini_trains.append(gini_train)
        gini_tests.append(gini_test)
        auc_trains.append(auc_train)
        auc_tests.append(auc_test)
        ks_trains.append(ks_train)
        ks_tests.append(ks_test)
        psis.append(psi)
        models.append(model)
    # 整合结果
    metrics = {
        'gini_train': float(np.mean(gini_trains)),
        'gini_test': float(np.mean(gini_tests)),
        'auc_train': float(np.mean(auc_trains)),
        'auc_test': float(np.mean(auc_tests)),
        'ks_train': float(np.mean(ks_trains)),
        'ks_test': float(np.mean(ks_tests)),
        'psi': float(np.mean(psis)),
        'default': float(np.mean(defaults))
    }
    # 输出每套超参数最优模型
    best_model_idx = np.argmax(defaults)
    dump_pkl(models[best_model_idx])
    # 生成训练集测试集
    train = data.iloc[kf_list[best_model_idx][0]]
    write_data(train, 'train.fea')
    test = data.iloc[kf_list[best_model_idx][1]]
    write_data(test, 'test.fea')
    LOG.debug(metrics)
    nni.report_final_result(metrics)
def run(X_train, X_test, y_train, y_test, model):
    """
    Train model, predict on test set and get model performance.

    :param X_train: train data
    :param X_test:
    :param y_train: train label
    :param y_test: test label
    :param model: specific model
    :return: report final result to nni
    """
    # 训练
    model.fit(X_train, y_train)
    y_pred = model.predict_proba(X_train)[:, 1]
    y_test_pred = model.predict_proba(X_test)[:, 1]
    # 计算指标
    ks_train, ks_test = calc_ks(y_pred, y_train), calc_ks(y_test_pred, y_test)
    auc_train, auc_test = calc_auc(y_pred, y_train), calc_auc(y_test_pred, y_test)
    gini_train, gini_test = calc_gini(y_pred, y_train), calc_gini(y_test_pred, y_test)
    psi = calc_psi(y_pred, y_test_pred)
    # 整合结果
    metrics = {
        'gini_train': gini_train,
        'gini_test': gini_test,
        'auc_train': auc_train,
        'auc_test': auc_test,
        'ks_train': ks_train,
        'ks_test': ks_test,
        'psi': psi,
        'default': 1.8 * ks_test - 0.8 * abs(ks_train - ks_test)
    }
    dump_pkl(model)
    LOG.debug(metrics)
    nni.report_final_result(metrics)