def std_svm(path): """ function that runs the best svm five times and print their accuracy :param path: path to the preprocessed data. If the provided preprocessed data is used: Data/preprocessed_data.csv """ data = GetData.load_preprocessed_data(path) data = data.drop( ['avg_home_win_odds', 'avg_draw_odds', 'avg_away_win_odds'], axis=1) input_vector, output_vector = GetData.convert_df_to_lists(data, False) svm_accs = [] for i in range(5): random_state = random.randint(1, 10) x_train, x_test, y_train, y_test = train_test_split( input_vector, output_vector, test_size=0.25, random_state=random_state) print('SVM running...') svm_acc = SVM.support_vector_machine(x_train, x_test, y_train, y_test, 10**7, 10**-4) svm_accs.append(svm_acc) print('svm accuracies:', svm_accs)
def run_best_ann_and_svm(data): """ function that runs the best svm and neural network (the shallow neural network with the relu activation function) and prints their confusion matrices and accuracies. The difference in probabilities for the neural network is also printed. :param data: dataframe containing the preprocessed data """ state = random.randint(1, 10) input_vector, output_vector = GetData.convert_df_to_lists(data, False) x_train, x_test, y_train, y_test = train_test_split(input_vector, output_vector, test_size=0.25, random_state=state) print('SVM running...') svm_acc = SVM.support_vector_machine(x_train, x_test, y_train, y_test, 10**7, 10**-4) y_train = GetData.convert_output_vector_to_nn_format(y_train) y_test = GetData.convert_output_vector_to_nn_format(y_test) print('Neural Network running...') print( 'The confusion matrix and probability differences will be printed after the NN has trained' ) shallow_nn_relu_acc = ShallowNeuralNetworks.shallow_neural_network_relu( x_train, x_test, y_train, y_test, 10) model_names = ['SVM', 'Shallow NN With relu'] accuracies = [svm_acc, shallow_nn_relu_acc] ModelAccuracyPlot.present_results(model_names, accuracies)
def run_best_ann_and_svm_from_raw_data(): """ function that runs the best neural network and svm without preprocessed data """ data = GetData.preprocessing() data = GetData.create_df_from_two_lists(data, odds=True) data = data.drop( ['avg_home_win_odds', 'avg_draw_odds', 'avg_away_win_odds'], axis=1) run_best_ann_and_svm(data)
def main(): data = GetData.load_preprocessed_data('/Users/vegardhaneberg/PycharmProjects/SoccerGroupProject/Data/preprocessed_data.csv') data = data.drop(['avg_home_win_odds', 'avg_draw_odds', 'avg_away_win_odds'], axis=1) input_vector, output_vector = GetData.convert_df_to_lists(data, False) x_train, x_test, y_train, y_test = train_test_split(input_vector, output_vector, test_size=0.25, random_state=1) y_train = GetData.convert_output_vector_to_nn_format(y_train) y_test = GetData.convert_output_vector_to_nn_format(y_test) two_layer_neural_network_tanh(x_train, x_test, y_train, y_test, 10)
def run_all_models_from_preprocessed_data(): data = GetData.load_preprocessed_data() data = data.drop( ['avg_home_win_odds', 'avg_draw_odds', 'avg_away_win_odds'], axis=1) input_vector, output_vector = GetData.convert_df_to_lists(data, False) random_state = random.randint(1, 10) #run_all_shallow(input_vector, output_vector, random_state) #run_all_two_layer(input_vector, output_vector, random_state) run_all_deep(input_vector, output_vector, random_state)
def pair_plot_ratings(): """ function that plots the two teams player rating against the match result """ data = GetData.load_preprocessed_data() data['match_result'] = data['match_result'].apply(lambda x: 'Home Win' if x == 0 else x) data['match_result'] = data['match_result'].apply(lambda x: 'Draw' if x == 1 else x) data['match_result'] = data['match_result'].apply(lambda x: 'Away Win' if x == 2 else x) data = data[(data['avg_home_rating'] != 0)] data = data[(data['avg_away_rating'] != 0)] sns.pairplot( data=data[['avg_home_rating', 'avg_away_rating', 'match_result']].sample(5000), hue="match_result", palette={ 'Home Win': 'peachpuff', 'Draw': 'thistle', 'Away Win': 'rebeccapurple' }) plt.show()
def run_best_ann_and_svm_from_preprocessed_data(): """ function that runs the best neural network and svm from preprocessed data """ data = GetData.load_preprocessed_data() data = data.drop( ['avg_home_win_odds', 'avg_draw_odds', 'avg_away_win_odds'], axis=1) run_best_ann_and_svm(data)
def std_best_models(path): """ function that runs the best three neural networks five times and print their accuracy :param path: path to the preprocessed data. If the provided preprocessed data is used: Data/preprocessed_data.csv """ data = GetData.load_preprocessed_data(path) data = data.drop( ['avg_home_win_odds', 'avg_draw_odds', 'avg_away_win_odds'], axis=1) input_vector, output_vector = GetData.convert_df_to_lists(data, False) shallow_tanh_accs = [] deep_relu_accs = [] deep_tanh_accs = [] for i in range(5): random_state = random.randint(1, 10) x_train, x_test, y_train, y_test = train_test_split( input_vector, output_vector, test_size=0.25, random_state=random_state) y_train = GetData.convert_output_vector_to_nn_format(y_train) y_test = GetData.convert_output_vector_to_nn_format(y_test) shallow_nn_relu_acc = ShallowNeuralNetworks.shallow_neural_network_relu( x_train, x_test, y_train, y_test, 10) deep_nn_relu_acc = DeepNeuralNetworks.deep_neural_network_relu( x_train, x_test, y_train, y_test, 10) deep_nn_tanh_acc = DeepNeuralNetworks.deep_neural_network_tanh( x_train, x_test, y_train, y_test, 10) shallow_tanh_accs.append(shallow_nn_relu_acc) deep_relu_accs.append(deep_nn_relu_acc) deep_tanh_accs.append(deep_nn_tanh_acc) print('Two layer:', shallow_tanh_accs) print('Deep relu:', deep_relu_accs) print('Deep tanh:', deep_tanh_accs)
def print_draw_predictions_betting_companies(): """ function that calculates the precentage of predicted draws from the betting companies :return: the percentage of draws """ data = GetData.load_preprocessed_data() dra_predictions = 0 for index, row in data.iterrows(): if row['avg_draw_odds'] > row['avg_home_win_odds'] and row[ 'avg_draw_odds'] > row['avg_away_win_odds']: dra_predictions = dra_predictions + 1 return dra_predictions / len(data)
def raw_player_statistics(): """ function that loads all Players and prints statistics """ players = GetData.get_all_players() print(players.isnull().sum()) print('Columns:', len(players.columns)) print('rows with nans:', len(players) - len(players.dropna())) print(players.describe()) print(players.dtypes)
def win_statistics(): """ function that reads all matches and calculates the percentage of home wins, draws and away wins :return: a list with the percentage of home wins, draws and away wins """ data = GetData.load_preprocessed_data() results = [0, 0, 0] matches = 0 for index, row in data.iterrows(): matches = matches + 1 results[int( row['match_result'])] = results[int(row['match_result'])] + 1 return results[0] / matches, results[1] / matches, results[2] / matches
def raw_match_statistics(): """ function that loads all matches and prints statistics """ matches = GetData.get_all_matches() print(matches.isnull().sum()) print('Columns:', len(matches.columns)) print('rows with nans:', len(matches) - len(matches.dropna())) matches.isnull().sum().to_csv( '/Users/vegardhaneberg/Desktop/isnullmatches.csv', index=False) print(matches.describe()) print(matches.dtypes)
def run_all_two_layer(input_vector, output_vector, random_state): """ function that runs all neural networks with two hidden layers, both with and without pca and prints the results :param input_vector: a list with lists that contains the input to the models :param output_vector: a list that contains the match results on the format 0, 1 or 2 :param random_state: the state for the train test split """ x_train, x_test, y_train, y_test = train_test_split( input_vector, output_vector, test_size=0.25, random_state=random_state) y_train = GetData.convert_output_vector_to_nn_format(y_train) y_test = GetData.convert_output_vector_to_nn_format(y_test) two_layer_nn_relu_acc = TwoLayerNeuralNetwork.two_layer_neural_network_relu( x_train, x_test, y_train, y_test, 10) two_layer_nn_tanh_acc = TwoLayerNeuralNetwork.two_layer_neural_network_tanh( x_train, x_test, y_train, y_test, 10) two_layer_nn_sigmoid_acc = TwoLayerNeuralNetwork.two_layer_neural_network_sigmoid( x_train, x_test, y_train, y_test, 10) df = GetData.create_df_from_two_lists([input_vector, output_vector]) pca_df = PCA.pca(df, 'match_result', 2) list_data = GetData.convert_df_to_lists(pca_df, False) x_train, x_test, y_train, y_test = train_test_split(list_data[0], list_data[1], test_size=0.25, random_state=1) y_train = GetData.convert_output_vector_to_nn_format(y_train) y_test = GetData.convert_output_vector_to_nn_format(y_test) two_layer_nn_relu_pca_acc = TwoLayerNeuralNetwork.two_layer_neural_network_relu( x_train, x_test, y_train, y_test, 2) two_layer_nn_tanh_pca_acc = TwoLayerNeuralNetwork.two_layer_neural_network_tanh( x_train, x_test, y_train, y_test, 2) two_layer_nn_sigmoid_pca_acc = TwoLayerNeuralNetwork.two_layer_neural_network_sigmoid( x_train, x_test, y_train, y_test, 2) model_names = [ 'Two Hidden Layer NN With relu', 'Two Hidden Layer NN With tanh', 'Two Hidden Layer NN With Sigmoid', 'Two Hidden Layer NN With relu and PCA', 'Two Hidden Layer NN With tanh and PCA', 'Two Hidden Layer NN With Sigmoid and PCA' ] accuracies = [ two_layer_nn_relu_acc, two_layer_nn_tanh_acc, two_layer_nn_sigmoid_acc, two_layer_nn_relu_pca_acc, two_layer_nn_tanh_pca_acc, two_layer_nn_sigmoid_pca_acc ] ModelAccuracyPlot.present_results(model_names, accuracies)
def betting_accuracy(): """ function that reads all matches from the preprocessed data and calculates the betting companies accuracy :return: the accuracy of the betting companies """ data = GetData.load_preprocessed_data() correct = 0 matches = 0 for index, row in data.iterrows(): matches = matches + 1 odds = [ row['avg_home_win_odds'], row['avg_draw_odds'], row['avg_away_win_odds'] ] i = odds.index(max(odds)) result = row['match_result'] if i == result: correct = correct + 1 return correct / matches
def player_statistics(): data = GetData.get_all_players() data.describe().to_csv('../Data/Statistics/player_stats.csv', index=True) print(data.describe())
def match_statistics(): data = GetData.get_all_matches() data.describe().to_csv('../Data/Statistics/stats.csv', index=True) print(data.describe())