def run_experiment_0(exp, league, type_evaluation, **params): """ :param exp: :param league: :param type_evaluation: :param params: :return: """ predictor = Predictor.get_predictor() filter_season = util.get_default(params, "season", None) for season in league.get_seasons(): if not util.is_None(filter_season) and season != filter_season: continue invest = 0 profit = 0 print(season) if season == util.get_current_season(): break for stage in range(1, league.get_stages_by_season(season) + 1): # KEY: match id VALUE: <prediction, probability> stage_predictions = predictor.predict(league, season, stage, **params) current_stage_bet = StageBet(stage, type_evaluation) for match_id, pair in stage_predictions.items(): if len(pair) == 0: continue match = Match.read_by_match_id(match_id) if util.is_None(match.B365H) or util.is_None( match.B365D) or util.is_None(match.B365A): continue predicted_label, prob = pair[0], pair[1] bet_odd = get_bet_odd(predicted_label, match) m_invest, m_profit = evaluate_bet(predictor, type_evaluation, match, predicted_label, prob) if type_evaluation == 5: if m_invest == 1: current_stage_bet.add_bet(prob, m_profit, bet_odd) elif type_evaluation == 6: current_stage_bet.add_bet(prob, m_profit, bet_odd) elif m_invest == 1: current_stage_bet.add_bet(prob, m_profit) profit += current_stage_bet.get_profit() invest += current_stage_bet.get_invest() print(stage, "\t", str(round(profit - invest, 2)).replace(".", ",")) print("Final investment:\t", str(round(invest, 2)).replace(".", ",")) print("Final profit:\t", str(round(profit, 2)).replace(".", ","))
def init_predictor(): print("> Init default predictor") date = util.get_date() matches = Match.read_by_match_date(date) matches = sorted(matches, key=lambda match: match.date) predictor = get_predictor() for m in matches: predictor.predict(m.get_league(), m.season, m.stage)
def print_bet_odds(date): GuiUtil.print_info("Bet odds of", date) matches = Match.read_by_match_date(date, order_by_date=True) pi = 1 for match in matches: match_event_out = get_match_event_out(match) GuiUtil.print_indent_answer(pi, match_event_out, True) pi += 1 if pi == 1: GuiUtil.print_att("No match found", date)
def search_by_date(): date = GuiUtil.input_date_or_day_passed() matches = Match.read_by_match_date(date) matches = sorted(matches, key=lambda match: match.date) if len(matches) == 0: GuiUtil.print_att("No match found in date", date) else: for i, match in enumerate(matches): match_out = get_printable_match(match) GuiUtil.print_indent_answer(i + 1, match_out, True)
def look_for_matches(self, date, force_parsing=False): print("Elaborating matches of the date:", date) matches_link = self.host_url_match + "/sport_events/1%2F"+date+"%2Fbasic_h2h%2F0%2F0/" log.debug("Looking for matches of date ["+date+"] at link ["+matches_link+"]") try: page = requests.get(matches_link).text except Exception as e: print(e) soup = BeautifulSoup(page, "html.parser") header_list = soup.find_all('div', {'class': 'mx-default-header mx-text-align-left mx-flexbox-container '}) body_list = soup.find_all('div', {'class': 'mx-table mx-soccer mx-matches-table mx-group-by-stage mx-container mx-league mx-livescore-table'}) for header, body in zip(header_list, body_list): # reading the league # Notice that the league is identified also with an attribute called "data-stage" league_name = str(header.a.string).strip() league_data_stage = header.a.attrs['data-stage'] # check if the this league corresponds to one of those one managed! cl = CrawlerLeague(league_name, league_data_stage) if cl.is_in_a_managed_country() and len(league_name) > 3: league = cl.get_league() if util.is_None(league): log.warning("Impossible to crawl this league [" + league_name + ", " + league_data_stage + "]") continue print("\t- Looking for the league [" + league.name + "]") season = cl.get_season() for div_event in body.find_all('div', {'class': 'mx-stage-events'}): # event correspond to "match_api_id" event = str(div_event.attrs["class"][3]).split("-")[2] match = Match.read_by_match_api_id(event) if force_parsing \ or not match \ or not match.are_teams_linedup() \ or not match.are_incidents_managed() \ or not match.get_home_team() \ or not match.get_away_team(): # crawl when at least one of the following happen: # - match is not in the DB # - formation of the teams are not in the DB # - incidents of the match are not in the DB # - home_team_api_id is not matched to any team in the DB # - away_team_api_id is not matched to any team in the DB log.debug("Need to crawl match ["+event+"]") cm = CrawlerMatch(match, league, event) cm.parse_json(season) else: log.debug("Not need to crawl match [" + event + "]")
def train_predict(self, stage): ml_alg = MachineLearningAlgorithm.get_machine_learning_algorithm(self.ml_alg_framework, self.ml_alg_method, self.matches, self.labels, data_description=self.matches_id, train_percentage=1, **self.ml_alg_params) try: ml_alg.train() except ValueError as ve: print(ve) raise MLException(3) predicted_labels, probability_events = ml_alg.predict(self.matches_to_predict) accuracy = 0 # print("***",stage,"***") for predicted_label, prob, label, match_id in zip(predicted_labels, probability_events, self.labels_to_predict, self.matches_to_predict_id): # print(match_name, predicted_label, "[",label,"]") if predicted_label == label: accuracy += 1 home_team_name = Match.read_by_match_id(match_id).get_home_team().team_short_name away_team_name = Match.read_by_match_id(match_id).get_away_team().team_short_name try: self.accuracy_by_team_dic[home_team_name].next_prediction(predicted_label, label,True) except KeyError: self.accuracy_by_team_dic[home_team_name] = TeamPredictionAccuracy(home_team_name) self.accuracy_by_team_dic[home_team_name].next_prediction(predicted_label, label,True) try: self.accuracy_by_team_dic[away_team_name].next_prediction(predicted_label, label, False) except KeyError: self.accuracy_by_team_dic[away_team_name] = TeamPredictionAccuracy(away_team_name) self.accuracy_by_team_dic[away_team_name].next_prediction(predicted_label, label,False) return accuracy/len(predicted_labels)
def read_by_team_api_id(team_api_id, season=None): """ Return list of players that play in the team identified my team_api_id if season is set, consider only that season :param team_api_id: :param season: :return: """ if not season: season = "" try: return Cache.get_element( str(team_api_id) + "_" + season, "PLAYER_BY_TEAM_API_ID") except KeyError: pass players = [] players_api_id = Match.read_players_api_id_by_team_api_id( team_api_id, season) for player_api_id in players_api_id: # if the player_api_id is not set --> continue if util.is_None(player_api_id): continue try: player = Cache.get_element(player_api_id, "PLAYER_BY_API_ID") except KeyError: filter = {"player_api_id": player_api_id} try: sqllite_row = SQLLite.get_connection().select( "Player", **filter)[0] except IndexError: log.warning("Player api id not found in DB [" + str(player_api_id) + "]") continue player = Player(sqllite_row["id"]) for attribute, value in sqllite_row.items(): player.__setattr__(attribute, value) Cache.add_element(player_api_id, player, "PLAYER_BY_API_ID") players.append(player) Cache.add_element( str(team_api_id) + "_" + season, players, "PLAYER_BY_TEAM_API_ID") return players
def predict(self, league, season, stage): try: self.predictions[league.id] except KeyError: self.predictions[league.id] = dict() this_predictions = dict() try: for match_id, p in self.predictions[league.id].items(): match = Match.read_by_match_id(match_id) if match.season == season and match.stage == stage: this_predictions[match_id] = p except KeyError: pass if len(this_predictions) > 0: return this_predictions this_predictions[league.id] = dict() try: matches, labels, matches_id, matches_to_predict, matches_to_predict_id, labels_to_predict = \ mli.get_input_to_train(self.ml_train_input_id, league, self.ml_train_input_representation, stage, self.ml_train_stages_to_train, season) ml_alg = mla.get_machine_learning_algorithm(self.ml_alg_framework, self.ml_alg_method, matches, labels, matches_id, train_percentage=1, ) ml_alg.train() predicted_labels, probability_events = ml_alg.predict(matches_to_predict) for match_id, prediction, probability in zip(matches_to_predict_id, predicted_labels, probability_events): this_predictions[match_id] = [prediction, probability] self.predictions[league.id][match_id] = [prediction, probability] return this_predictions except Exception as e: return {}
def get_matches(self, season=None, ordered=True, stage=None): """ Return the matches this player has played :param season: :param ordered: :param stage: :return: """ if util.is_None(self.player_api_id): return [] matches = Match.read_by_player_api_id(self.player_api_id) if season: matches = [m for m in matches if m.season == season] if stage: matches = [m for m in matches if m.stage < stage] if ordered: matches = sorted(matches, key=lambda match: match.stage) return matches
def predict_by_date(): global ml_alg_method global ml_alg_framework global ml_train_input_id global ml_train_input_representation global ml_train_stages_to_train if check_setting_current_predictor() == -1: return date = GuiUtil.input_date_or_day_passed() matches = Match.read_by_match_date(date) matches = sorted(matches, key=lambda match: match.date) if len(matches) == 0: GuiUtil.print_att("No match found in date", date) else: GuiUtil.print_ans("Prediction by date", date) pi = 1 for match in matches: if not match.is_finished(): league = match.get_league() season = match.season stage = match.stage predictor = Predictor.get_predictor(ml_alg_framework, ml_alg_method, ml_train_input_id, ml_train_input_representation, ml_train_stages_to_train) prediction_by_league = predictor.predict(league, season, stage) try: prediction, probability = prediction_by_league[match.id] prediction_str = get_printable_prediction(match, prediction, probability) GuiUtil.print_indent_answer(pi, prediction_str, True) pi += 1 except KeyError: log.warning("Not possible to predict [" + str(match.id) + ": " + match.get_home_team().team_long_name + "vs" + match.get_away_team().team_long_name + "]") if pi == 1: GuiUtil.print_ans("Matches to predict", "NOT FOUND")
def run_experiment_4(exp, league, predictor=Predictor.get_predictor(), **params): for season in league.get_seasons()[4:]: distance = {i: 0 for i in [0, 1, 2]} number_bet_odds = {i: 0 for i in [0, 1, 2]} print(season) for stage in range(1, league.get_stages_by_season(season) + 1): stage_predictions = predictor.predict(league, season, stage, **params) for match_id, pair in stage_predictions.items(): if len(pair) == 0: continue match = Match.read_by_match_id(match_id) if util.is_None(match.B365H) or util.is_None(match.B365D) or util.is_None(match.B365A)\ or match.B365H == 0 or match.B365D == 0 or match.B365A == 0: continue predicted_label, prob = pair[0], pair[1] our_bet_odds = 1 / prob bm_bet_odds = -1 if predicted_label == 1: bm_bet_odds = match.B365H elif predicted_label == 0: bm_bet_odds = match.B365D elif predicted_label == 2: bm_bet_odds = match.B365A distance[predicted_label] += math.fabs(our_bet_odds - bm_bet_odds) number_bet_odds[predicted_label] += 1 print(1, distance[1] / max(number_bet_odds[1], 1)) print(0, distance[0] / max(number_bet_odds[0], 1)) print(2, distance[2] / max(number_bet_odds[2], 1))
def get_best_team_predicted(self, league, season, stage, n_teams_returned=3): best_teams = dict() s = season i = 1 if stage - 1 == 0: y = int(s.split("/")[0]) - 1 s = str(y) + "/" + str(y + 1) stage_predictions = league.get_stages_by_season(s) else: stage_predictions = stage - 1 while i <= self.ml_train_stages_to_train: predictions = self.predict(league, s, stage_predictions) for match_id, pair in predictions.items(): if len(pair) == 0: continue match = Match.read_by_match_id(match_id) pred_label = pair[0] if pred_label == MLUtil.get_label(match): util.increase_dict_entry(match.home_team_api_id, best_teams) util.increase_dict_entry(match.away_team_api_id, best_teams) i += 1 if stage_predictions - i == 0: y = int(s.split("/")[0])-1 s = str(y)+"/"+str(y+1) stage_predictions = league.get_stages_by_season(s) else: stage_predictions -= 1 h = [] for team_api_id, accuracy in best_teams.items(): heapq.heappush(h, (accuracy, team_api_id)) top_k = [Team.read_by_team_api_id(team_api_id) for a, team_api_id in heapq.nlargest(n_teams_returned, h, lambda x: x[0])[:n_teams_returned]] return top_k
def get_matches(self, season=None, ordered=True, date=None, finished=None, stage=None): """ return the matches belonging to this league :param season: :param ordered: :param date: :param finished: :param stage: :return: """ matches = Match.read_matches_by_league(self.id, season) if ordered: try: matches = sorted(matches, key=lambda match: match.stage) except TypeError as e: log.error("Impossible to order match of the league [" + str(self.id) + "], of the season [" + season + "] by stage") raise e if not util.is_None(finished) and finished: matches = [m for m in matches if m.is_finished()] if stage: matches = [m for m in matches if m.stage == stage] if date: matches = [m for m in matches if m.date.startswith(date)] return matches
def parse_json(self, season): """ The match is a json string, comment information are not useful for our purposes. :param season: :return: """ ''' status_descfk = self.json_match["status_descfk"] status_type = self.json_match["status_type"] status_desc_short = self.json_match["status_desc_short"] status_desc_name = self.json_match["status_desc_name"] match_n = self.json_match["n"] sportfk = self.json_match["sportfk"] countryfk = self.json_match["countryfk"] tournamentfk = self.json_match["tournamentfk"] tournament_templatefk = self.json_match["tournament_templatefk"] tournament_stagefk = self.json_match["tournament_stagefk"] live = self.json_match["live"] winner = self.json_match["winner"] winners = self.json_match["winners"] scopes_hash = self.json_match["scopes_hash"] incidents_hash = self.json_match["incidents_hash"] n_home_yellow_card = 0 n_home_double_yellow_card = 0 n_home_red_card = 0 n_away_yellow_card = 0 n_away_double_yellow_card = 0 n_away_red_card = 0 if type(self.json_match["cards"]) == dict: try: self.json_match["cards"]["1"] n_home_yellow_card = util.get_default(self.json_match["cards"]["1"], "14", 0) n_home_double_yellow_card = util.get_default(self.json_match["cards"]["1"], "15", 0) n_home_red_card = util.get_default(self.json_match["cards"]["1"], "16", 0) except KeyError: pass try: self.json_match["cards"]["2"] n_away_yellow_card = util.get_default(self.json_match["cards"]["2"], "14", 0) n_away_double_yellow_card = util.get_default(self.json_match["cards"]["2"], "15", 0) n_away_red_card = util.get_default(self.json_match["cards"]["2"], "16", 0) except KeyError: pass ''' # Goals of the match # n_home_goal_first_time = util.get_default(self.json_match["results"]["1"]["r"], "5", 0) n_home_goal = util.get_default(self.json_match["results"]["1"]["r"], "1", 0) # n_away_goal_first_time = util.get_default(self.json_match["results"]["2"]["r"], "5", 0) n_away_goal = util.get_default(self.json_match["results"]["2"]["r"], "1", 0) # Represent a match to persist with a dictionary match_attributes = dict() match_attributes["country_id"] = self.league.country_id match_attributes["league_id"] = self.league.id match_attributes["season"] = season match_attributes["stage"] = self.json_match[ "round"] # stage of a match match_attributes["date"] = self.json_match["startdate"] match_attributes["match_api_id"] = self.json_match["eventfk"] match_attributes["home_team_api_id"] = self.json_match["homefk"] match_attributes["away_team_api_id"] = self.json_match["awayfk"] match_attributes["home_team_goal"] = n_home_goal match_attributes["away_team_goal"] = n_away_goal # check team home_team_name = check_team(self.json_match["homefk"]) away_team_name = check_team(self.json_match["awayfk"]) # formations to be crawled if: # - the match is not stored in the DB # - teams formation are not stored in the DB # - match is not finished if not self.match or not self.match.are_teams_linedup( ) or not self.match.is_finished(): lc = CrawlerLineup(self.match, match_attributes, self.event) lc.get_lineups() # event incidents to be crawled if: # - the match is not stored in the DB # - some information are missing if not self.match or (self.match and not self.match.are_incidents_managed()): li = CrawlerIncidents(self.match, match_attributes, self.event) li.get_incidents() if not self.match: # persist match Match.write_new_match(match_attributes) else: # update match Match.update_match(self.match, match_attributes) print("\t", home_team_name, "vs", away_team_name)