from dataAnalysis import getTrainData from sklearn import svm from sklearn.ensemble import ExtraTreesClassifier from sklearn.cross_validation import cross_val_score from sklearn.neighbors import NearestNeighbors from sklearn.ensemble import RandomForestClassifier from sklearn import neighbors, datasets X,y,dict=getTrainData() print("input loaded!") clfs=\ [svm.SVC(), # NearestNeighbors(n_neighbors=7, algorithm='ball_tree'), neighbors.KNeighborsClassifier(15,weights='distance'), RandomForestClassifier(n_estimators=10)] for clf in clfs: print(clf) scores = cross_val_score(clf, X, y)#,scoring="accuracy" print(scores.mean())
from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import ExtraTreesClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.tree import DecisionTreeClassifier from dataAnalysis import getTrainData importance=[10,9,3,7,40,8,11,16,1,38,37,19,35,33,12,59,34,51, 92,28,36,32,77,22,45,65,5,17,14,70,30,27,20,80,52,72 ,24,61,62,75,29,39,57,2,13,44,49,64,15,102,114,60,66,31 ,54,26,48,73,100,69,88,117,25,0,87,119,78,99,42,43,76,55 ,23,125,53,6,50,110,56,21,124,109,58,41,111,101,107,81,98,105 ,84,103,18,63,93,122,67,108,83,113,106,116,4,115,104,82,120,89 ,118,94,79,96,86,68,47,97,95,123,46,121,85,91,112,90,74,71] X,y=getTrainData(size=1000) print("Data loaded!") X=X[:,importance[1:40]] clf = DecisionTreeClassifier(max_depth=None, min_samples_split=1, random_state=0) scores = cross_val_score(clf, X, y) print(scores.mean()) clf = RandomForestClassifier(n_estimators=1000, max_depth=None, min_samples_split=1, random_state=0) scores = cross_val_score(clf, X, y) print(scores.mean()) clf = ExtraTreesClassifier(n_estimators=1000, max_depth=None, min_samples_split=1, random_state=0) scores = cross_val_score(clf, X, y) print(scores.mean()) exit(0)
from sklearn.cross_validation import cross_val_score from sklearn.datasets import make_blobs from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import ExtraTreesClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.tree import DecisionTreeClassifier from dataAnalysis import getTrainData importance=[10,9,3,7,40,8,11,16,1,38,37,19,35,33,12,59,34,51, 92,28,36,32,77,22,45,65,5,17,14,70,30,27,20,80,52,72 ,24,61,62,75,29,39,57,2,13,44,49,64,15,102,114,60,66,31 ,54,26,48,73,100,69,88,117,25,0,87,119,78,99,42,43,76,55 ,23,125,53,6,50,110,56,21,124,109,58,41,111,101,107,81,98,105 ,84,103,18,63,93,122,67,108,83,113,106,116,4,115,104,82,120,89 ,118,94,79,96,86,68,47,97,95,123,46,121,85,91,112,90,74,71] Test = pd.read_csv('./input/test.csv') X,y=getTrainData() print("Data loaded!") X=X[:,importance[1:30]] clf = RandomForestClassifier(n_estimators=1000, max_depth=None, min_samples_split=1, random_state=0).fit(X,y) ytest=clf.predict(X) submission = pd.DataFrame({ "Id": Test["Id"], "Response": ytest }) submission.to_csv('prudential.csv', index=False)