def model_xgboost(data): model = XGBClassifier() model.fit(data['X_train'], data['y_train']) y_valid_hat = model.predict(data['X_valid']) BER = src.ber(y=data['y_valid'].tolist(), y_hat=y_valid_hat.tolist()) return BER
def do_predict(idx, params): # do prediction # load model model_path = os.path.join('cache', 'models_fitted', (params['hash_id'] + '.pkl')) if os.path.isfile(model_path): model_fitted = pickle.load(open(model_path, 'rb')) # load data data_path = os.path.join('cache', 'data_reduced', (params['data_hash_id'] + '.pkl')) data = pickle.load(open(data_path, 'rb')) # predict validation set y_valid_hat = model_fitted.predict(data['X_valid']) # output BER ber = src.ber(y=data['y_valid'].tolist(), y_hat=y_valid_hat.tolist()) roc_auc = roc_auc_score(y_true=data['y_valid'].tolist(), y_score=y_valid_hat.tolist()) f1 = f1_score(y_true=data['y_valid'].tolist(), y_pred=y_valid_hat.tolist()) predict_ber = { 'hash_id': params['hash_id'], 'BER': ber, 'ROC_AUC': roc_auc, 'F1': f1 } predict_ber = pd.DataFrame(predict_ber, index=[idx]) predict_ber.to_csv( os.path.join('cache', 'predict_ber', (params['hash_id'] + '.csv'))) print("Predicted BER: " + str(ber))
def model_svm(data): X = data['X_train'] y = data['y_train'] print(X.shape) X_new = SelectPercentile(mutual_info_classif, percentile=10).fit_transform(X, y) print(X_new.shape) c = [2**float(ci) for ci in range(-5, 11, 2)] g = [2**float(gi) for gi in range(-11, 9, 2)] degree = [d for d in range(0, 6)] kernel = ['poly'] #, 'sigmoid', 'rbf'] params = {'c': c, 'g': g, 'degree': degree, 'kernel': kernel} grid = src.expand_grid(**params) grid = pd.DataFrame(grid) grid['BERcv'] = 1.0 grid.shape if False: from sklearn.utils import shuffle grid = shuffle(grid) for idx, row in grid[0:20].iterrows(): model = svm.SVC(gamma=row['g'], C=row['c'], kernel=row['kernel'], degree=row['degree']) BERs = [] kf = KFold(n_splits=10, random_state=1986, shuffle=True) for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] model.fit(X_train, y_train) y_test_hat = model.predict(X_test) BERs.append(src.ber(y=y_test.tolist(), y_hat=y_test_hat.tolist())) grid.loc[idx, 'BERcv'] = np.mean(BERs) best = grid[grid['BERcv'] == grid['BERcv'].min()] model = svm.SVC(gamma=np.median(best['g']), C=np.median(best['c']), kernel=row['kernel']) return BER
def benchmark_pca_logistic(data): # make PCA Logistic reg pca = PCA(n_components=5).fit(data['X_train']) Xpca_train = pca.transform(data['X_train']) Xpca_valid = pca.transform(data['X_valid']) # Basic Logistic regression clf = LogisticRegression(random_state=0, solver='sag', multi_class='ovr', max_iter=1000).fit(Xpca_train, data['y_train']) y_valid_hat = clf.predict(Xpca_valid) BER = src.ber(y=data['y_valid'].tolist(), y_hat=y_valid_hat.tolist()) return BER
def model_lightgbm(data): model = lgbm.LGBMClassifier() model.fit(data['X_train'], data['y_train']) y_valid_hat = model.predict(data['X_valid']) BER = src.ber(y=data['y_valid'].tolist(), y_hat=y_valid_hat.tolist()) return BER
def benchmark_all_neg1(data): y_valid_hat = [-1 for x in range(len(data['y_valid'].tolist()))] BER = src.ber(y=data['y_valid'].tolist(), y_hat=y_valid_hat) return BER