Пример #1
0
def main():
	print("Loading train data")
	total, total_target = getdata()

	print("Normalizing data")
	mu = np.mean(total, axis=0)
	sigma = np.mean(total, axis=0)
	X_norm = (total - mu) / sigma

	print("PCA")
	pca = PCA(n_components=16, whiten=True)
	pca.fit(X_norm)
	X_pca = pca.transform(X_norm)

	print("Split train")
	X_train, X_test, y_train, y_test = train_test_split(
		X_pca, total_target, test_size=0.25, random_state=0)
	tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
		'C': [1, 10, 100, 1000]},
		{'kernel':['linear'], 'C': [1, 10, 100, 1000]}]

	print("GridSearchCV")
	clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=3, score_func=s.bidirectional_auc, n_jobs=-1, verbose=3)
	clf.fit(X_train, y_train)

	print("Best parameters")
	print(clf.best_estimator_)

	for params, mean_score, scores in clf.cv_scores_:
		print("%0.3f (+/-$0.03f) for %r"
			% (mean_score, scores.std() / 2, params))

	y_true, y_pred = y_test, clf.predit(X_test)
	print(classification_report(y_true, y_pred))
	print(s.bidirectional_auc(y_true, y_pred))

	print("Saving the classifier")
	data_io.save_model(clf)