X_train, X_test, y_train = utils.load_data(data_name='log_flipped') scaler = StandardScaler() scaler.fit(pd.concat((X_train, X_test), axis=0, ignore_index=True)) X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns) X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns) kf = StratifiedKFold(n_splits=CALIBRATION_CV, shuffle=True, random_state=4891) for i in range(1, MAX_POWER + 1): clf = KNeighborsClassifier(n_neighbors=2**i, n_jobs=1) clf = CalibratedClassifierCV(clf, method='isotonic', cv=kf) utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME + str(i), 'ensemble', stratify_labels=utils.load_stratify_labels()) train = [] test = [] for i in range(1, MAX_POWER + 1): tr, ts, _ = utils.load_data(data_name=MODEL_NAME + str(i), data_folder='ensemble') train.append(tr) test.append(ts) result = pd.concat(train, axis=1, ignore_index=True) utils.save_result(result.values, MODEL_NAME[:-1], data_folder='ensemble',
metric = 'logloss' if CLASSIFIER == 'xgb' else 'binary_logloss' if CLASSIFIER == 'xgb': clf = xgb.XGBClassifier(**PARAMS) else: par = PARAMS.copy() par['num_leaves'] = 2**par['max_depth'] del par['gamma'] del par['max_depth'] clf = lgb.LGBMClassifier(**par) if MODE == 'cv': utils.perform_cv(X_train, y_train, clf, MODEL_NAME + '-' + CLASSIFIER, fit_params={'eval_metric': metric}, stratify_labels=utils.load_stratify_labels()) elif MODE == 'ensemble': utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'ensemble', fit_params={'eval_metric': metric}, seed_name='seed', stratify_labels=utils.load_stratify_labels(), runs=ENSEMBLE_RUNS) else: print('Unsupported mode')
if CLASSIFIER == 'xgb': clf = xgb.XGBClassifier(**PARAMS) else: par = PARAMS.copy() par['num_leaves'] = 2**par['max_depth'] del par['gamma'] del par['max_depth'] clf = lgb.LGBMClassifier(**par) if MODE == 'cv': utils.perform_cv(X_train, y_train, clf, MODEL_NAME + '-' + CLASSIFIER, fit_params={'eval_metric': metric}, stratify_labels=utils.load_stratify_labels()) elif MODE == 'out': utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'out', fit_params={'eval_metric': metric}, seed_name='seed', runs=RUNS, stratify_labels=utils.load_stratify_labels()) else: print('Unsupported mode')
import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) X_train, X_test, y_train = utils.load_data('log_flipped') scaler = StandardScaler() scaler.fit(pd.concat((X_train, X_test), axis=0, ignore_index=True)) X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns) X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns) encoder = LabelEncoder() y_train = encoder.fit_transform(y_train).astype(np.int32) num_classes = len(encoder.classes_) X_train = np.array(X_train).astype(np.float32) X_test = np.array(X_test).astype(np.float32) num_features = X_train.shape[1] clf = NeuralNet(input_shape=(None, num_features), output_num_units=num_classes, **PARAMS) if MODE == 'cv': utils.perform_cv(X_train, y_train, clf, MODEL_NAME, stratify_labels=utils.load_stratify_labels()) elif MODE == 'ensemble': utils.VJUH(X_train, X_test, y_train, clf, MODEL_NAME, 'ensemble', lasagne=True, runs=RUNS, stratify_labels=utils.load_stratify_labels() ) else: print('Unsupported mode')