if ver_score[x-1][-1] > score: score = ver_score[x-1][-1] max_x = x max_y = y print "X: ", x, ". Y: ", y, ":" print "Training Score: ", train_score[x-1][-1], ". Verification Score: ", \ ver_score[x-1][-1] #Example params given. Will be decided by optimizing. def gen_mlp(): clf = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(7,3), random_state=1, shuffle=True, warm_start=1, verbose=1) clf.fit(X_train_2008, Y_train_2008) return clf # Save Model mlp = read_make_pkl("saved_objs/mlp.pkl", gen_mlp) def main(argv): try: opts, args = getopt.getopt(argv,"ho:",["output="]) except getopt.GetoptError: print 'mlpclassifier.py [-o [2008] [2012] [tune]]' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'mlpclassifer.py [-o [2008] [2012] [tune]]' sys.exit() elif opt in ("-o", "--output"): if (arg == "2008"): # DEBUG print("mlp.predict(X_test_2008).shape" +
neighbor = n print "Number of neighbors: ", neighbor, print "Training Score: ", train_score[-1], ". Verification Score: ", \ ver_score[-1] return neighbor #Example params given. Will be decided by optimizing. def gen_knearest(): clf = KNeighborsClassifier(n_neighbors=10) clf.fit(X_train_2008, Y_train_2008) return clf # Save Model knearest = read_make_pkl("saved_objs/knearest.pkl", gen_knearest) def main(argv): try: opts, args = getopt.getopt(argv, "ho:", ["output="]) except getopt.GetoptError: print 'knearest_neighbor.py [-o [2008] [2012] [tune]]' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'knearest_neighbor.py [-o [2008] [2012] [tune]]' sys.exit() elif opt in ("-o", "--output"): if (arg == "2008"): # DEBUG
import os.path import numpy as np import pkl_help from pkl_help import read_make_pkl from sklearn import preprocessing import preprocess_help as ph def get_csv_data(filename): data = np.genfromtxt(filename, delimiter=",") return data train_2008 = read_make_pkl("saved_objs/train_2008.pkl", lambda: get_csv_data("data/train_2008.csv"), compress=True) test_2008 = read_make_pkl("saved_objs/test_2008.pkl", lambda: get_csv_data("data/test_2008.csv"), compress=True) test_2012 = read_make_pkl("saved_objs/test_2012.pkl", lambda: get_csv_data("data/test_2012.csv"), compress=True) ################################################################################## X_train_2008 = read_make_pkl("saved_objs/X_train_2008.pkl", lambda: ph.remove_header_and_normalize_train(train_2008), compress=True) Y_train_2008 = read_make_pkl("saved_objs/Y_train_2008.pkl", lambda: ph.grab_train_Y(train_2008), compress=True) X_ver_2008 = read_make_pkl("saved_objs/X_ver_2008.pkl",
#bst = xgb.train(plst, dtrain, evals=evallist) # Generate xgb model def gen_xgb(): evals = [(X_ver_2008, Y_ver_2008)] model = xgb.XGBRegressor() model.fit(X_train_2008, Y_train_2008, eval_set=evals, early_stopping_rounds=10, verbose=True) return model xgb_model = read_make_pkl("saved_objs/xgb.pkl", gen_xgb) def main(argv): try: opts, args = getopt.getopt(argv, "ho:", ["output="]) except getopt.GetoptError: print 'xgb.py [-o [2008] [2012]]' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'xgb.py [-o [2008] [2012]]' sys.exit() elif opt in ("-o", "--output"): if (arg == "2008"): preds = round_predictions(xgb_model.predict(X_test_2008))
# Get training data X_train_2008 = get_pkl("saved_objs/X_train_2008.pkl") Y_train_2008 = get_pkl("saved_objs/Y_train_2008.pkl") X_test_2008 = get_pkl("saved_objs/X_test_2008.pkl") X_test_2012 = get_pkl("saved_objs/X_test_2012.pkl") # function to generate lasso model def gen_lasso(): model = LassoCV(cv=10, n_jobs=-1, verbose=True) model.fit(X_train_2008, Y_train_2008) return model # Save model lasso = read_make_pkl("saved_objs/lasso.pkl", gen_lasso) def main(argv): try: opts, args = getopt.getopt(argv, "ho:", ["output="]) except getopt.GetoptError: print 'lasso.py [-o [2008] [2012]]' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'lasso.py [-o [2008] [2012]]' sys.exit() elif opt in ("-o", "--output"): if (arg == "2008"): preds = round_predictions(lasso.predict(X_test_2008))
# Generate xgb model def gen_xgb(): evals = [(X_ver_2008, Y_ver_2008)] model = xgb.XGBRegressor(n_estimators=100000) #cvresult = xgb.cv(model.get_params(), dtrain, nfold=10, # early_stopping_rounds=20) #print ("CVRESULT:\n" + str(cvresult)) model.fit(X_train_2008, Y_train_2008, eval_set=evals, early_stopping_rounds=20, verbose=True) return model xgb_model = read_make_pkl("saved_objs/xgb_100000_estimators.pkl", gen_xgb) def xgb_preds_2008(): return xgb_model.predict(X_test_2008) def xgb_preds_2012(): return xgb_model.predict(X_test_2012) read_make_pkl("saved_objs/xgb_100k_estimators_preds_2008.pkl", xgb_preds_2008) read_make_pkl("saved_objs/xgb_100k_estimators_preds_2012.pkl", xgb_preds_2012) def main(argv):
#bst = xgb.train(plst, dtrain, evals=evallist) # Generate xgb model def gen_xgb(): evals = [(X_ver_2008, Y_ver_2008)] model = xgb.XGBRegressor(n_estimators=1000) model.fit(X_train_2008, Y_train_2008, eval_set=evals, early_stopping_rounds=20, verbose=True) return model xgb_model = read_make_pkl("saved_objs/xgb_1000_estimators.pkl", gen_xgb) def main(argv): try: opts, args = getopt.getopt(argv, "ho:", ["output="]) except getopt.GetoptError: print 'xgb_1000_estimators.py [-o [2008] [2012]]' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'xgb_1000_estimators.py [-o [2008] [2012]]' sys.exit() elif opt in ("-o", "--output"): if (arg == "2008"): preds = round_predictions(xgb_model.predict(X_test_2008))
print "Estimators: ", estimators, ". Max Depth: ", depth, ":" print "Training Score: ", train_score[estimators-1][-1], ". Verification Score: ", \ ver_score[estimators-1][-1] return (max_depth, max_est) #Example params given. Will be decided by optimizing. def gen_adaboost(): clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=100) clf.fit(X_train_2008, Y_train_2008) return clf # Save Model adaboost = read_make_pkl("saved_objs/adaboost.pkl", gen_adaboost) def main(argv): try: opts, args = getopt.getopt(argv, "ho:", ["output="]) except getopt.GetoptError: print 'adaboost.py [-o [2008] [2012] [tune]]' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'adaboost.py [-o [2008] [2012] [tune]]' sys.exit() elif opt in ("-o", "--output"): if (arg == "2008"): # DEBUG
import os.path import numpy as np import pkl_help from pkl_help import read_make_pkl from sklearn import svm import preprocess_help as ph from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.metrics import classification_report from sklearn.svm import SVC X_train_2008 = read_make_pkl( "saved_objs/X_train_2008.pkl", lambda: ph.remove_header_and_normalize_train(train_2008), compress=True) Y_train_2008 = read_make_pkl("saved_objs/Y_train_2008.pkl", lambda: ph.grab_train_Y(train_2008), compress=True) X_ver_2008 = read_make_pkl( "saved_objs/X_ver_2008.pkl", lambda: ph.remove_header_and_normalize_ver(train_2008), compress=True) Y_ver_2008 = read_make_pkl("saved_objs/Y_ver_2008.pkl", lambda: ph.grab_ver_Y(train_2008), compress=True) params = [{ 'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000] }, {
print("d1: " + str(model.predict(X_test_2008).shape)) print("d2: " + str(model.coef_.shape)) # return model def lin_reg_modified_predict(model, X): preds = model.predict(X).reshape(-1, 1) mpreds = round_predictions(preds) # Debug print("mpreds.shape: " + str(mpreds.shape)) return mpreds # Save model lin_reg_model = read_make_pkl("saved_objs/lin_reg.pkl", gen_lin_reg_model) def main(argv): try: opts, args = getopt.getopt(argv, "ho:", ["output="]) except getopt.GetoptError: print 'linear_regression.py [-o [2008] [2012]]' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'linear_regression.py [-o [2008] [2012]]' sys.exit() elif opt in ("-o", "--output"): if (arg == "2008"): # DEBUG
model = RandomForestClassifier(max_depth=15, n_estimators=30, max_features='auto') model.fit(X_train_2008, Y_train_2008) return model def rand_forest_modified_predict(model, Y): preds = model.predict(Y).reshape(-1, 1) # Debug print("preds.shape: " + str(preds.shape)) return preds # Save Model rand_forest_model = read_make_pkl("saved_objs/rand_forest.pkl", gen_rand_forest) def main(argv): try: opts, args = getopt.getopt(argv, "ho:", ["output="]) except getopt.GetoptError: print 'random_forest.py [-o [2008] [2012] [graphs]]' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'random_forest.py [-o [2008] [2012] [graphs]]' sys.exit() elif opt in ("-o", "--output"): if (arg == "2008"): # DEBUG
# Get training data X_train_2008 = get_pkl("saved_objs/X_train_2008.pkl") Y_train_2008 = get_pkl("saved_objs/Y_train_2008.pkl") X_test_2008 = get_pkl("saved_objs/X_test_2008.pkl") X_test_2012 = get_pkl("saved_objs/X_test_2012.pkl") # function to generate ridge model def gen_ridge(): model = RidgeCV() model.fit(X_train_2008, Y_train_2008) return model # Save model ridge = read_make_pkl("saved_objs/ridge.pkl", gen_ridge) def main(argv): try: opts, args = getopt.getopt(argv, "ho:", ["output="]) except getopt.GetoptError: print 'ridge.py [-o [2008] [2012]]' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'ridge.py [-o [2008] [2012]]' sys.exit() elif opt in ("-o", "--output"): if (arg == "2008"): preds = round_predictions(ridge.predict(X_test_2008))
def gen_voting(): print "Making Voting Classifier" clf = VotingClassifier(estimators=[('ridge', ridge), ('mlp', mlp), ('rand_forest', rand_forest), ('adaboost', adaboost)], voting='hard') print "Training Voting Classifier" clf.fit(X_train_2008, Y_train_2008) print "Scoring Voting Classifier" print "Training Score: ", clf.score(X_train_2008, Y_train_2008) print "Ver Score: ", clf.score(X_ver, Y_ver) return clf # Save Model voting = read_make_pkl("saved_objs/voting.pkl", gen_voting) def main(argv): try: opts, args = getopt.getopt(argv, "ho:", ["output="]) except getopt.GetoptError: print 'voting.py [-o [2008] [2012]]' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'voting.py [-o [2008] [2012]]' sys.exit() elif opt in ("-o", "--output"): if (arg == "2008"): # DEBUG