def train(self):
		pi = pi_data.piData()
		pi.gen_train_data()
		scaler = StandardScaler()
		scale_x_train = scaler.fit_transform(pi.x_train)

		# Calculate sample weight due to umbalanced labels
		weight = [len(pi.y_train)/1.0/(len(pi.y_train)-sum(pi.y_train)), len(pi.y_train)/1.0/sum(pi.y_train)]
		train_weights = [weight[j] for j in pi.y_train]

		# F1 score for linear SVC
		#clf = LogisticRegression(verbose=1, C=1, penalty='l1').fit(scale_x_train, pi.y_train)
		#clf = GradientBoostingClassifier(n_estimators=100, max_depth=2, verbose=1).fit(scale_x_train, pi.y_train, sample_weight=train_weights)
		clf = RandomForestClassifier(n_estimators=200, class_weight="balanced", min_samples_split=1).fit(scale_x_train, pi.y_train)
		print "Accuracy of train :" + str(clf.score(scale_x_train, pi.y_train))
		print "F1 score of train :" + str(f1_score(pi.y_train, clf.predict(scale_x_train)))
		print "Precision of train :" + str(precision_score(pi.y_train, clf.predict(scale_x_train)))
		print "Recall of train :" + str(recall_score(pi.y_train, clf.predict(scale_x_train)))
		print "AUC of train :" + str(roc_auc_score(pi.y_train, clf.predict(scale_x_train)))

		#### Feature Importance for linear model
		# features = [line.strip() for line in open("selected_pi_feature.txt")]
		# print "Feature importance :"
		# for i in abs(clf.coef_[0]).argsort()[::-1]:
		# 	print features[i], clf.coef_[0][i]

		features = [line.strip() + "_p" for line in open("selected_pi_feature.txt")] + [line.strip() + "_n" for line in open("selected_pi_feature.txt")]
		print "Feature importance :"
		f = []
		for i in abs(clf.feature_importances_).argsort()[::-1]:
			print features[i], clf.feature_importances_[i]	
			f.append(i)	
		print f
		pickle.dump(clf, open("pi_model.pkl", "w"))
		pickle.dump(scaler, open("pi_scaler.pkl", "w"))
	def validation(self):
		validation_set = [(0, 0.2), (0.2, 0.4), (0.4, 0.6), (0.6, 0.8), (0.8, 1.0)]
		validation_result = [[] for _ in xrange(len(validation_set))]
		f1 = [[] for _ in xrange(len(validation_set))]
		precision = [[] for _ in xrange(len(validation_set))]
		recall = [[] for _ in xrange(len(validation_set))]
		parameters = []
		for i, v in enumerate(validation_set):
			print "Validation " + str(i) + " : "
			pi = pi_data.piData()
			pi.gen_validation_data_np(v[0], v[1])
			scaler = StandardScaler()
			#scale_x_train = scaler.fit_transform(self.feature_selection(pi.x_train))
			#scale_x_test = scaler.fit_transform(self.feature_selection(pi.x_test))
			scale_x_train = scaler.fit_transform(pi.x_train)
			scale_x_test = scaler.fit_transform(pi.x_test)

			# Calculate sample weight due to umbalanced labels
			weight = [len(pi.y_test)/1.0/(len(pi.y_test)-sum(pi.y_test)), len(pi.y_test)/1.0/sum(pi.y_test)]
			test_weights = [weight[j] for j in pi.y_test]
			weight = [len(pi.y_train)/1.0/(len(pi.y_train)-sum(pi.y_train)), len(pi.y_train)/1.0/sum(pi.y_train)]
			train_weights = [weight[j] for j in pi.y_train]

			""" For SVC 
			for c in [100]:#np.logspace(-2, 1, 4):
				for g in [1]:#np.logspace(0, 1, 4):
					clf = svm.SVC(class_weight='balanced', verbose=1, C=c, degree=2).fit(scale_x_train, pi.y_train)
					validation_result[i].append(clf.score(scale_x_test, pi.y_test))
					f1[i].append(f1_score(pi.y_test, clf.predict(scale_x_test)))
					precision[i].append(precision_score(pi.y_test, clf.predict(scale_x_test)))
					recall[i].append(recall_score(pi.y_test, clf.predict(scale_x_test)))
					print "Accuracy of validation " + str(i+1) + ", C = " + str(c) + ", gamma = " + str(g) + " :" + str(clf.score(scale_x_test, pi.y_test))     

					# Write the parameter to array
					if i == 0:
						parameters.append((c, g))

					print "F1 score :" + str(f1_score(pi.y_test, clf.predict(scale_x_test)))
					print "Precision :" + str(precision_score(pi.y_test, clf.predict(scale_x_test)))
					print "Recall :" + str(recall_score(pi.y_test, clf.predict(scale_x_test)))
			"""

			"""For Linear SVC 
			for c in np.logspace(-2, 2, 3):
				clf = svm.LinearSVC(class_weight='balanced', verbose=1, C=c).fit(scale_x_train, pi.y_train)
				validation_result[i].append(clf.score(scale_x_test, pi.y_test))
				f1[i].append(f1_score(pi.y_test, clf.predict(scale_x_test)))
				precision[i].append(precision_score(pi.y_test, clf.predict(scale_x_test)))
				recall[i].append(recall_score(pi.y_test, clf.predict(scale_x_test)))
				print "Accuracy of validation " + str(i+1) + ", C = " + str(c) + " :" + str(clf.score(scale_x_test, pi.y_test))     

				# Write the parameter to array
				if i == 0:
					parameters.append(c)

				print "F1 score :" + str(f1_score(pi.y_test, clf.predict(scale_x_test)))
				print "Precision :" + str(precision_score(pi.y_test, clf.predict(scale_x_test)))
				print "Recall :" + str(recall_score(pi.y_test, clf.predict(scale_x_test)))
			"""

			"""For Gradient Boosted Classifier 
			for d in [7]:#range(1, 8, 2):
				for f in [1.0]:#[1.0, 0.75, 0.5, 0.25]:
					clf = GradientBoostingClassifier(n_estimators=400, max_depth=d, verbose=1).fit(scale_x_train, pi.y_train, sample_weight=train_weights)
					validation_result[i].append(clf.score(scale_x_test, pi.y_test))
					f1[i].append(f1_score(pi.y_test, clf.predict(scale_x_test)))
					precision[i].append(precision_score(pi.y_test, clf.predict(scale_x_test)))
					recall[i].append(recall_score(pi.y_test, clf.predict(scale_x_test)))
					print "Accuracy of validation " + str(i+1) + ", Max Features = " + str(f) + ", Max Depth = " + str(d) + " :" + str(clf.score(scale_x_test, pi.y_test))     

					# Write the parameter to array
					if i == 0:
						parameters.append(d)

					print "F1 score :" + str(f1_score(pi.y_test, clf.predict(scale_x_test)))
					print "Precision :" + str(precision_score(pi.y_test, clf.predict(scale_x_test)))
					print "Recall :" + str(recall_score(pi.y_test, clf.predict(scale_x_test)))
			"""

			"""For Random Forest Classifier 
			for d in range(1, 8, 2):
				for f in [1.0]:#[1.0, 0.75, 0.5, 0.25]:
					clf = RandomForestClassifier(n_estimators=200, class_weight="balanced", min_samples_split=d, verbose=1).fit(scale_x_train, pi.y_train)
					validation_result[i].append(clf.score(scale_x_test, pi.y_test))
					f1[i].append(f1_score(pi.y_test, clf.predict(scale_x_test)))
					precision[i].append(precision_score(pi.y_test, clf.predict(scale_x_test)))
					recall[i].append(recall_score(pi.y_test, clf.predict(scale_x_test)))
					print "Accuracy of validation " + str(i+1) + ", Max Features = " + str(f) + ", Minimum Sample Split = " + str(d) + " :" + str(clf.score(scale_x_test, pi.y_test))     

					# Write the parameter to array
					if i == 0:
						parameters.append(d)

					print "F1 score :" + str(f1_score(pi.y_test, clf.predict(scale_x_test)))
					print "Precision :" + str(precision_score(pi.y_test, clf.predict(scale_x_test)))
					print "Recall :" + str(recall_score(pi.y_test, clf.predict(scale_x_test)))
			"""
			# For Logistic Regression
			for c in np.logspace(-2, 2, 3):
				clf = LogisticRegression(class_weight='balanced', penalty='l1', C=c).fit(scale_x_train, pi.y_train)
				validation_result[i].append(clf.score(scale_x_test, pi.y_test))
				f1[i].append(f1_score(pi.y_test, clf.predict(scale_x_test)))
				precision[i].append(precision_score(pi.y_test, clf.predict(scale_x_test)))
				recall[i].append(recall_score(pi.y_test, clf.predict(scale_x_test)))
				print "Accuracy of validation " + str(i+1) + ", C = " + str(c) + " :" + str(clf.score(scale_x_test, pi.y_test))     

				# Write the parameter to array
				if i == 0:
					parameters.append(c)

				print "F1 score :" + str(f1_score(pi.y_test, clf.predict(scale_x_test)))
				print "Precision :" + str(precision_score(pi.y_test, clf.predict(scale_x_test)))
				print "Recall :" + str(recall_score(pi.y_test, clf.predict(scale_x_test)))
			
			print "======================================="

		validation_mean = np.mean(np.array(validation_result), axis=0)
		f1_mean = np.mean(np.array(f1), axis=0)
		precision_mean = np.mean(np.array(precision), axis=0)
		recall_mean = np.mean(np.array(recall), axis=0)
		# One parameter
		print "Best parameter : " + " parameter1 = " + str(parameters[validation_mean.argmax()])
		# Two parameter
		#print "Best parameter : " + " parameter1 = " + str(parameters[validation_mean.argmax()][0]) + " , parameter2 = " + str(parameters[validation_mean.argmax()][1])
		print "with cross-validation accuracy: " + str(validation_mean[validation_mean.argmax()])
		print "with cross-validation f1-score: " + str(f1_mean[validation_mean.argmax()])
		print "with cross-validation precision: " + str(precision_mean[validation_mean.argmax()])
		print "with cross-validation recall: " + str(recall_mean[validation_mean.argmax()])