示例#1
0
def train():
    print("Building Data...")
    trainingX, trainingY, team_stats = data.get_data()

    tourney_teams, team_id_map = data.get_tourney_teams(2017)
    tourney_teams.sort()

    testingXtemp = []

    matchups = []

    for team_1 in tourney_teams:
        for team_2 in tourney_teams:
            if team_1 < team_2:
                game_features = data.get_game_features(team_1, team_2, 0, 2017,
                                                       team_stats)
                testingXtemp.append(game_features)

                game = [team_1, team_2]
                matchups.append(game)

    testingX = np.array(testingXtemp)

    print("Fitting model...")
    model = MLPClassifier(hidden_layer_sizes=(30, 30))
    model.fit(trainingX, np.ravel(trainingY))

    return model, testingX, matchups
示例#2
0
def train():
    print "Building Data..."
    trainingX, trainingY, team_stats = data.get_data()

    tourney_teams, team_id_map = data.get_tourney_teams(2017)
    tourney_teams.sort()

    testingXtemp = []

    matchups = []

    for team_1 in tourney_teams:
        for team_2 in tourney_teams:
            if team_1 < team_2:
                game_features = data.get_game_features(team_1, team_2, 0, 2017,
                                                       team_stats)
                testingXtemp.append(game_features)

                game = [team_1, team_2]
                matchups.append(game)

    testingX = np.array(testingXtemp)

    print "Fitting model..."
    # Test the random forest algorithm
    seed(1)
    # evaluate algorithm
    max_depth = 10
    min_size = 1
    sample_size = 1.0
    n_features = 28
    for n_trees in [1, 5, 10]:
        predictions = evaluate_algorithm(trainingX, trainingY, random_forest,
                                         max_depth, min_size, sample_size,
                                         n_trees, n_features)
        print predictions
        # print('Trees: %d' % n_trees)
        # print('Scores: %s' % scores)
        # print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))

    return model, testingX, matchups
def build_bracket():
    teams_df = pd.read_csv("../data2017/FirstRound.csv", header=0)
    results_df = pd.read_csv("../data2017/TourneyMatchups2017.csv", header=0)
    tourney_teams, team_id_map = data.get_tourney_teams(2017)

    adaboost_data = pd.read_csv("../predictions/AdaBoost_Predictions_2017.csv",
                                header=0)
    knn_data = pd.read_csv("../predictions/KNN_Predictions_2017.csv", header=0)
    naive_bayes_data = pd.read_csv(
        "../predictions/NaiveBayes_Predictions_2017.csv", header=0)
    neural_net_data = pd.read_csv(
        "../predictions/NeuralNet_Predictions_2017.csv", header=0)
    random_forest_data = pd.read_csv(
        "../predictions/RandomForest_Predictions_2017.csv", header=0)
    regression_data = pd.read_csv(
        "../predictions/Regression_Predictions_2017.csv", header=0)
    svm_data = pd.read_csv("../predictions/SVM_Predictions_2017.csv", header=0)

    test_list = [
        adaboost_data, knn_data, naive_bayes_data, neural_net_data,
        random_forest_data, regression_data, svm_data
    ]

    for df in test_list:
        score = 0
        teams = []
        for index, row in teams_df.iterrows():
            teams.append(row["Team1"])
            teams.append(row["Team2"])

        my_index = 0
        tempTeams = []
        round_val = 10
        for i in range(6):  # 6 rounds of tourney
            print
            print("ROUND " + str(i + 1))
            while my_index < len(teams):
                #print my_index
                team1 = teams[my_index]
                team2 = teams[my_index + 1]
                #print "t1: ", team1, " t2: ", team2

                for index, row in df.iterrows():
                    #print my_index
                    if (row["Team1"] == team1 and row["Team2"] == team2) or (
                            row["Team1"] == team2 and row["Team2"] == team1):
                        if row["Prediction"] == 0:
                            tempTeams.append(row["Team1"])
                            print(
                                str(team_id_map[row["Team1"]]) + " over " +
                                str(team_id_map[row["Team2"]]))
                            print("Round Val: " + str(round_val))
                            winner = 0
                        else:
                            tempTeams.append(row["Team2"])
                            print(
                                str(team_id_map[row["Team2"]]) + " over " +
                                str(team_id_map[row["Team1"]]))
                            print("Round Val: " + str(round_val))
                            winner = 1

                        for index, new_row in results_df.iterrows():
                            if (new_row["Team1"] == team1 and new_row["Team2"]
                                    == team2) or (new_row["Team1"] == team2 and
                                                  new_row["Team2"] == team1):
                                if winner == new_row["Prediction"]:
                                    score = score + round_val
                                    print("Score: " + str(score))

                        my_index = my_index + 2

            round_val = round_val * 2
            teams = tempTeams
            tempTeams = []
            my_index = 0
        print("Score: " + str(score))
import data
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

if __name__ == "__main__":
    trainingX, trainingY, team_stats = data.get_data()

    print("Generated training set!")

    tourney_teams, team_id_map = data.get_tourney_teams(2017)
    tourney_teams.sort()

    print("Got tourney teams!")

    testingXtemp = []
    testingYtemp = []

    matchups = []

    for team_1 in tourney_teams:
        for team_2 in tourney_teams:
            if team_1 < team_2:
                game_features = data.get_game_features(team_1, team_2, 0, 2017,
                                                       team_stats)
                testingXtemp.append(game_features)

                game = [team_1, team_2]
                matchups.append(game)

    testingX = np.array(testingXtemp)