def main(): print("Loading train data") total, total_target = getdata() print("Normalizing data") mu = np.mean(total, axis=0) sigma = np.mean(total, axis=0) X_norm = (total - mu) / sigma print("PCA") pca = PCA(n_components=16, whiten=True) pca.fit(X_norm) X_pca = pca.transform(X_norm) print("Split train") X_train, X_test, y_train, y_test = train_test_split( X_pca, total_target, test_size=0.25, random_state=0) tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}, {'kernel':['linear'], 'C': [1, 10, 100, 1000]}] print("GridSearchCV") clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=3, score_func=s.bidirectional_auc, n_jobs=-1, verbose=3) clf.fit(X_train, y_train) print("Best parameters") print(clf.best_estimator_) for params, mean_score, scores in clf.cv_scores_: print("%0.3f (+/-$0.03f) for %r" % (mean_score, scores.std() / 2, params)) y_true, y_pred = y_test, clf.predit(X_test) print(classification_report(y_true, y_pred)) print(s.bidirectional_auc(y_true, y_pred)) print("Saving the classifier") data_io.save_model(clf)