def test_case_optimize_parameters(): player_data, player_stats_data, team_data, match_data = first_data_preparation( ) countries = [ 'France', ] min_date = '2013-07-31' max_date = '2014-07-31' match_data = match_data.loc[match_data['league_country'].isin(countries)] match_data = match_data[match_data['date'] >= min_date] match_data = match_data[match_data['date'] < max_date] mask_home = team_data['team_api_id'].isin(match_data['home_team_api_id']) mask_away = team_data['team_api_id'].isin(match_data['away_team_api_id']) team_universe = list(team_data[mask_home | mask_away]['team_long_name']) # print(len(team_universe), team_universe) # print('nb matches', match_data.shape[0]) # on the below: non effective way to use team names as id (easier for human-checking and debugging) team_id_to_name, team_name_to_id = create_dict_involved_teams( match_data, team_data) match_data['home_team_id'] = match_data.apply( lambda x: team_id_to_name[x['home_team_api_id']], axis=1) match_data['away_team_id'] = match_data.apply( lambda x: team_id_to_name[x['away_team_api_id']], axis=1) model = DixonColes(team_universe) # print("... fit dixon coles parameters ...") opti_params = model.optimize_parameters(match_data, max_date, verbose=VERBOSE, home_goals_key='home_team_goal', away_goals_key='away_team_goal')
def test_dixon_coles_multi_countries_prediction(): player_data, player_stats_data, team_data, match_data = first_data_preparation( ) countries = ['France', 'England', 'Germany', 'Spain', 'Italy'] min_date_str = '2013-07-31' # filter by countries mask_countries = match_data['league_country'].isin(countries) match_data = match_data.loc[mask_countries] # convert date input string to actual python date match_data['date'] = match_data.apply( lambda x: datetime.strptime(x['date'], "%Y-%m-%d %H:%M:%S").date(), axis=1) # on the below: non effective way to use team names as id (easier for human-checking and debugging) team_id_to_name, team_name_to_id = create_dict_involved_teams( match_data, team_data) match_data['home_team_id'] = match_data.apply( lambda x: team_id_to_name[x['home_team_api_id']], axis=1) match_data['away_team_id'] = match_data.apply( lambda x: team_id_to_name[x['away_team_api_id']], axis=1) # save full match history full_history = match_data # filter on recent matchs only (to make predictions on them) min_date = datetime.strptime(min_date_str, "%Y-%m-%d").date() mask_date = match_data['date'] >= min_date matches_to_predict = match_data[mask_date] print("nb matches full history", full_history.shape[0]) print("nb matches_to_predict", matches_to_predict.shape[0]) # define weight fct (default is one) dixon_coles_params = { 'weight_fct': lambda t1, t2: np.exp(-0.3 * (t2 - t1).days / 365.25) } predictions = dixon_coles_predictions( matches_to_predict, full_history, dixon_coles_params=dixon_coles_params, verbose=1) # save predictions # predictions.to_csv("D:/Football_betting/predictions/" + "dixon_coles_predictions.csv", index=False) np.savetxt("D:/Football_betting/predictions/" + "dixon_coles_predictions.csv", predictions, delimiter=',', fmt='%.6e')
def test_case_dixon_coles_one_country_predictions(): player_data, player_stats_data, team_data, match_data = first_data_preparation( ) countries = [ 'France', ] # countries = ['England', ] min_date = datetime.strptime('2016-04-30', "%Y-%m-%d").date() mask_countries = match_data['league_country'].isin(countries) match_data = match_data[mask_countries] # input(match_data['league_country'].unique()) # convert date input string to actual python date match_data['date'] = match_data.apply( lambda x: datetime.strptime(x['date'], "%Y-%m-%d %H:%M:%S").date(), axis=1) # on the below: non effective way to use team names as id (easier for human-checking and debugging) team_id_to_name, team_name_to_id = create_dict_involved_teams( match_data, team_data) match_data['home_team_id'] = match_data.apply( lambda x: team_id_to_name[x['home_team_api_id']], axis=1) match_data['away_team_id'] = match_data.apply( lambda x: team_id_to_name[x['away_team_api_id']], axis=1) # on the below: we define our team universe (teams we calibrate parameters on) mask_home = team_data['team_api_id'].isin(match_data['home_team_api_id']) mask_away = team_data['team_api_id'].isin(match_data['away_team_api_id']) team_universe = list(team_data[mask_home | mask_away]['team_long_name']) printv(1, VERBOSE, len(team_universe), team_universe) printv(1, VERBOSE, 'nb matches', match_data.shape[0]) # save full_history before selecting recent matches to predict full_history = match_data mask_date = match_data['date'] >= min_date match_data = match_data[mask_date] exp_weight_fct = lambda t1, t2: np.exp(-0.3 * (t2 - t1).days / 365.25) model = DixonColes(team_universe, weight_fct=exp_weight_fct) printv(1, VERBOSE, " ... fit dixon coles parameters and predict match outcomes ... ") predictions, param_histo = model.fit_and_predict( match_data, full_history, nb_obs_years=1, verbose=VERBOSE, home_goals_key='home_team_goal', away_goals_key='away_team_goal') printv(1, VERBOSE, " ... match outcomes predicted ... ") match_outcomes = match_outcomes_hot_vectors(match_data) bkm_quotes = pd.DataFrame() bkm_quotes['W'], bkm_quotes['D'], bkm_quotes['L'] = match_data[ 'B365H'], match_data['B365D'], match_data['B365A'] analysis = analyze_predictions(match_outcomes, predictions, bkm_quotes, verbose=VERBOSE, nb_max_matchs_displayed=40, fully_labelled_matches=match_data) model_log_loss, model_rps, (log_loss_comparison_l, rps_comparison_l) = analysis remove_nan_mask = [ not contain_nan(bkm_quotes.iloc[i]) for i in range(bkm_quotes.shape[0]) ] bkm_quotes_r = bkm_quotes.iloc[remove_nan_mask] match_outcomes_r = match_outcomes.iloc[remove_nan_mask] predictions_r = predictions[remove_nan_mask] constant_invest_stgy = ConstantAmountInvestStrategy( 1.) # invest 1 in each match (if expected return > 1% actually) constant_sigma_invest_stgy = ConstantStdDevInvestStrategy( 0.01) # stdDev of each bet is 1% of wealth kelly_invest_stgy = KellyInvestStrategy( ) # Kelly's ratio investment to maximize's wealth long term return constant_percent_stgy = ConstantPercentInvestStrategy( 0.01) # invest 1% of money each time for invest_stgy in [ constant_invest_stgy, constant_sigma_invest_stgy, kelly_invest_stgy, constant_percent_stgy ]: printv(1, VERBOSE, "\n#### results for ", invest_stgy.__class__.__name__, "####") init_wealth = 100 df_recap_stgy = invest_stgy.apply_invest_strategy( predictions_r, bkm_quotes_r, match_outcomes_r, init_wealth=init_wealth) printv( 1, VERBOSE, df_recap_stgy[[ 'invested_amounts', 'exp_gain_amounts', 'gain_amounts' ]].sum()) printv(1, VERBOSE, 'wealth: from', init_wealth, 'to', round(df_recap_stgy['wealth'].iloc[-1], 4))