예제 #1
0
    X_train, X_test, y_train = utils.load_data(data_name='log_flipped')
    scaler = StandardScaler()
    scaler.fit(pd.concat((X_train, X_test), axis=0, ignore_index=True))
    X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    kf = StratifiedKFold(n_splits=CALIBRATION_CV,
                         shuffle=True,
                         random_state=4891)
    for i in range(1, MAX_POWER + 1):
        clf = KNeighborsClassifier(n_neighbors=2**i, n_jobs=1)
        clf = CalibratedClassifierCV(clf, method='isotonic', cv=kf)
        utils.VJUH(X_train,
                   X_test,
                   y_train,
                   clf,
                   MODEL_NAME + str(i),
                   'ensemble',
                   stratify_labels=utils.load_stratify_labels())

    train = []
    test = []
    for i in range(1, MAX_POWER + 1):
        tr, ts, _ = utils.load_data(data_name=MODEL_NAME + str(i),
                                    data_folder='ensemble')
        train.append(tr)
        test.append(ts)
    result = pd.concat(train, axis=1, ignore_index=True)
    utils.save_result(result.values,
                      MODEL_NAME[:-1],
                      data_folder='ensemble',
예제 #2
0
    metric = 'logloss' if CLASSIFIER == 'xgb' else 'binary_logloss'
    if CLASSIFIER == 'xgb':
        clf = xgb.XGBClassifier(**PARAMS)
    else:
        par = PARAMS.copy()
        par['num_leaves'] = 2**par['max_depth']
        del par['gamma']
        del par['max_depth']
        clf = lgb.LGBMClassifier(**par)

    if MODE == 'cv':
        utils.perform_cv(X_train,
                         y_train,
                         clf,
                         MODEL_NAME + '-' + CLASSIFIER,
                         fit_params={'eval_metric': metric},
                         stratify_labels=utils.load_stratify_labels())
    elif MODE == 'ensemble':
        utils.VJUH(X_train,
                   X_test,
                   y_train,
                   clf,
                   MODEL_NAME,
                   'ensemble',
                   fit_params={'eval_metric': metric},
                   seed_name='seed',
                   stratify_labels=utils.load_stratify_labels(),
                   runs=ENSEMBLE_RUNS)
    else:
        print('Unsupported mode')
예제 #3
0
    if CLASSIFIER == 'xgb':
        clf = xgb.XGBClassifier(**PARAMS)
    else:
        par = PARAMS.copy()
        par['num_leaves'] = 2**par['max_depth']
        del par['gamma']
        del par['max_depth']
        clf = lgb.LGBMClassifier(**par)

    if MODE == 'cv':
        utils.perform_cv(X_train,
                         y_train,
                         clf,
                         MODEL_NAME + '-' + CLASSIFIER,
                         fit_params={'eval_metric': metric},
                         stratify_labels=utils.load_stratify_labels())
    elif MODE == 'out':
        utils.VJUH(X_train,
                   X_test,
                   y_train,
                   clf,
                   MODEL_NAME,
                   'out',
                   fit_params={'eval_metric': metric},
                   seed_name='seed',
                   runs=RUNS,
                   stratify_labels=utils.load_stratify_labels())
    else:
        print('Unsupported mode')
예제 #4
0
    import warnings

    warnings.filterwarnings("ignore", category=DeprecationWarning)

    X_train, X_test, y_train = utils.load_data('log_flipped')

    scaler = StandardScaler()
    scaler.fit(pd.concat((X_train, X_test), axis=0, ignore_index=True))
    X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

    encoder = LabelEncoder()
    y_train = encoder.fit_transform(y_train).astype(np.int32)
    num_classes = len(encoder.classes_)
    X_train = np.array(X_train).astype(np.float32)
    X_test = np.array(X_test).astype(np.float32)
    num_features = X_train.shape[1]

    clf = NeuralNet(input_shape=(None, num_features), output_num_units=num_classes, **PARAMS)

    if MODE == 'cv':
        utils.perform_cv(X_train, y_train, clf, MODEL_NAME, stratify_labels=utils.load_stratify_labels())
    elif MODE == 'ensemble':
        utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'ensemble',
                   lasagne=True,
                   runs=RUNS,
                   stratify_labels=utils.load_stratify_labels()
                   )
    else:
        print('Unsupported mode')