#Support Vector Machines import sys, os #is necessary for relative import sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import from reading_train_data import txs, tys from reading_test_data import tds, tis from write_results import write_predictions from sklearn import metrics from sklearn.svm import SVC # fit a SVM model to the data model = SVC() model.fit(txs, tys[:,1]) predicted = model.predict_proba(tds) write_predictions("cart.csv", tis[:,0], predicted)
#watchlist = [ (xgmat,'train') ] ##bst = xgb.train( plst, xgmat, num_round, watchlist ); #matrix = xgb.DMatrix(train_x,labels,weight = weights) #100 features maximum :( if get_feature_importance: prev_fn = bst.feature_names bst = gbm.booster() bst.feature_names = [i for i in train_x.columns.values]#args_x[1:] imps = bst.get_fscore() #print (len(imps)) if configure_verbose_mode: print(imps) imps = np.array([[str(imps[i]) if imps.has_key(i) else "NaN" for i in bst.feature_names]]) imps = np.append([train_x.columns.values], imps, axis=0) np.savetxt(feature_importance_file, imps,fmt='%s', delimiter=',') #return the original state bst.feature_names = prev_fn predictions = gbm.predict_proba(test_x) arr = dtest[args_x[0]].as_matrix() write_predictions("1boost.csv",arr,predictions) #import matplotlib.pyplot as plt #xgb.plot_importance(gbm) #plt.show() #bad plot :(
import sys, os #is necessary for relative import sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import from reading_train_data import train_data_x, train_data_y from reading_test_data import test_data from write_results import write_predictions from sklearn import metrics from sklearn.naive_bayes import GaussianNB model = GaussianNB() model.fit(train_data_x[:,1:], train_data_y[:,1]) predicted = model.predict_proba(test_data[:,1:]) write_predictions("nb_submission.csv", test_data[:,0], predicted)
import sys, os #is necessary for relative import sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import from reading_train_data import txs, tys from reading_test_data import tds, tis from write_results import write_predictions from sklearn import metrics from sklearn.neighbors import KNeighborsClassifier # fit a k-nearest neighbor model to the data model = KNeighborsClassifier(5, 'distance') model.fit(txs, tys[:, 1]) predicted = model.predict_proba(tds) write_predictions("k_nearest.csv", tis, predicted)
import sys, os #is necessary for relative import sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import from reading_train_data import txs, tys from reading_test_data import tds, tis from write_results import write_predictions from sklearn import metrics from sklearn.neighbors import KNeighborsClassifier # fit a k-nearest neighbor model to the data model = KNeighborsClassifier(5,'distance') model.fit(txs, tys[:,1]) predicted = model.predict_proba(tds) write_predictions("k_nearest.csv", tis, predicted)
#Classification and Regression Trees import sys, os #is necessary for relative import sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import from reading_train_data import txs, tys from reading_test_data import tds, tis from write_results import write_predictions from sklearn import metrics from sklearn.tree import DecisionTreeClassifier # fit a CART model to the data model = DecisionTreeClassifier() model.fit(txs, tys[:, 1]) predicted = model.predict_proba(tds) write_predictions("cart.csv", tis, predicted)
import sys, os #is necessary for relative import sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import from reading_train_data import train_data_x, train_data_y from reading_test_data import test_data from write_results import write_predictions from sklearn import metrics from sklearn.naive_bayes import GaussianNB model = GaussianNB() model.fit(train_data_x[:, 1:], train_data_y[:, 1]) predicted = model.predict_proba(test_data[:, 1:]) write_predictions("nb_submission.csv", test_data[:, 0], predicted)
pca = PCA(num_components) x_transformed = pca.fit_transform(txs) ts_transformed = pca.transform(tds) from write_results import write_predictions from sklearn import metrics from sklearn.linear_model import LogisticRegression model = LogisticRegression() model.fit(x_transformed, tys[:,1]) predicted = model.predict_proba(ts_transformed) write_predictions("regression_pca25.csv",tis,predicted) """ writing_parts = [list(map(str,rw)) for rw in x_transformed] labels = [ "feature" + str(i) for i in range(1,num_components+1)] writing_parts = np.append([labels], writing_parts, axis=0) np.savetxt("../../data/pca/training_pca" + str(num_components) + ".csv", writing_parts, fmt='%s', delimiter=',') writing_parts = [list(map(str,rw)) for rw in ts_transformed] labels = [ "feature" + str(i) for i in range(1,num_components+1)] writing_parts = np.append([labels], writing_parts, axis=0) np.savetxt("../../data/pca/test_pca" + str(num_components) + ".csv", writing_parts, fmt='%s', delimiter=',',newline='\n') """
#watchlist = [ (xgmat,'train') ] ##bst = xgb.train( plst, xgmat, num_round, watchlist ); #matrix = xgb.DMatrix(train_x,labels,weight = weights) #100 features maximum :( if get_feature_importance: bst = gbm.booster() prev_fn = bst.feature_names bst.feature_names = [i for i in train_x.columns.values]#args_x[1:] imps = bst.get_fscore() #print (len(imps)) if configure_verbose_mode: print(imps) imps = np.array([[str(imps[i]) if imps.has_key(i) else "NaN" for i in bst.feature_names]]) imps = np.append([train_x.columns.values], imps, axis=0) np.savetxt(feature_importance_file, imps,fmt='%s', delimiter=',') #return the original state bst.feature_names = prev_fn predictions = gbm.predict_proba(test_x) arr = dtest[args_x[0]].as_matrix() write_predictions("1boost.csv",arr,predictions) #import matplotlib.pyplot as plt #xgb.plot_importance(gbm) #plt.show() #bad plot :(