示例#1
0
def add_rpm_to_player_table(year):
    if int(year) < 2014:
        print("No RPM data available prior to 2013-14 season")
        return
    year_string = construct_year_string(year)
    rpm_filename = "{}/player_rpms_{}_rpms.csv".format(year_string, year)
    with open(rpm_filename, "r") as csvfile:
        data = csv.reader(csvfile)
        for index, ele in enumerate(data):
            # skip header row
            if index == 0:
                continue
            team_codes = ele[2].split("/")
            player_name = ele[0]
            if not player_name:
                continue
            player_stub_name = common_utils.player_to_stub_name(player_name)
            for code in team_codes:
                if code in common_utils.team_codes_to_bball_ref_codes:
                    code = common_utils.team_codes_to_bball_ref_codes[code]

                player_name = common_utils.nba_com_player_name_to_bball_ref_player_name(
                    player_name, code, year)
                player_stub_name = common_utils.player_to_stub_name(
                    player_name)
                rpm_update = {
                    "orpm": float(ele[5]),
                    "drpm": float(ele[6]),
                    "rpm": float(ele[7])
                }
                db.players.update_one(
                    {
                        'player_index.name_stub': player_stub_name,
                        'player_index.team': code,
                        'player_index.season': year
                    }, {'$set': rpm_update}, False)
                print("Added RPM for {}".format(player_name))
示例#2
0
def import_players_into_mongo(year):
    year_string = construct_year_string(year)
    print('creating index')
    db.players.create_index([('player_index.name_stub', pymongo.ASCENDING),
                             ('player_index.season', pymongo.ASCENDING),
                             ('player_index.team', pymongo.ASCENDING)],
                            unique=True)

    pprint(db.players.index_information())
    print('index created (or already existed)')

    with open("{}/players_advanced_{}.json".format(year_string, year),
              "r") as playerfile:
        player_json = json.load(playerfile)

        headers = player_json.pop(0)
        headers.pop(1)
        headers.append(("Season", "season", None))

        for index, player_row in enumerate(player_json):
            player_row.append(year)
            mongo_row = {}
            for index, element in enumerate(player_row):
                try:
                    mongo_row[headers[index][1]] = float(player_row[index])
                except ValueError as e:
                    mongo_row[headers[index][1]] = player_row[index]
            mongo_row["player"] = mongo_row["player"].replace("*", "")
            stub_name = common_utils.player_to_stub_name(mongo_row["player"])

            print("import {}_{}_{}".format(stub_name, mongo_row["team_id"],
                                           year))

            db.players.update_one(
                {
                    'player_index.name_stub': stub_name,
                    'player_index.team': mongo_row["team_id"].strip(),
                    'player_index.season': year
                }, {'$set': mongo_row}, True)
示例#3
0
def store_player_and_possession_data_for_matrix(year, season_type):
    player_info = {}
    year_string = common_utils.construct_year_string(year)
    year_as_int = int(year)
    possessions = db.possessions[season_type][year_string].find().limit(limit)
    count = 0
    player_index = 0
    num_possessions = possessions.count()
    games_data = db.seasons.find_one({"year_string": year_string})["{}_games_data".format(season_type)]
    #print(games_data)

    # player info should already be calculated, this is to get index
    if season_type == "playoffs":
        num_players = len(db.seasons.find_one({ "year_string": year_string })["player_info"])

    for possession in possessions:
        #print(possession)
        count += 1
        if count % 20000 == 0:
            print("Poss count {}/{}".format(count, num_possessions))
        lineups_with_team_names = convert_lineups_to_player_team_strings(possession, games_data)
        home_or_away = ["home_lineup", "away_lineup"]
        for lineup_type in home_or_away:
            for player_team in lineups_with_team_names[lineup_type]:
                player_name, team_code = player_team.split("_")
                stub_name = common_utils.player_to_stub_name(player_name)
                #print(player_name, team_code)
                if player_name == "None": 
                    continue
                if player_team not in player_info:
                    if team_code == "WAS" and year_as_int < 1998:
                        team_code = "WSB"

                    # found the player in bball ref database
                    if common_utils.player_exists(stub_name, team_code, year_as_int):
                        player_info[player_team] = {
                            "index": player_index,
                            "possessions": 0,
                            "stub_name" : stub_name
                        }
                        player_index += 1
                    # try to resolve name
                    else:
                        print("{}_{}_{} not found.".format(stub_name, team_code, year_as_int))
                        if season_type == "playoffs":
                            print("\n{}_{}_{} played in playoffs and not in regular season.".format(stub_name, team_code, year_as_int))
                            print("[{}][{}]: {}".format(
                                possession["possession_metadata"]["gid"], 
                                possession["possession_metadata"]["event_num"],
                                possession["possession_metadata"]["message"]
                            ))
                            print("\tHome", lineups_with_team_names["home_lineup"])
                            print("\tAway", lineups_with_team_names["away_lineup"])
                            db.seasons.find_one_and_update(
                                { "year_string": year_string },
                                { "$set" : 
                                    { "player_info.{}".format(player_team): 
                                        {
                                            "index": num_players,
                                            "possessions": 0,
                                            "stub_name": stub_name
                                        }  
                                    } 
                                }
                            )
                            num_players += 1


                # player was already seen in a lineup
                else:
                    player_info[player_team]["possessions"] += 1

    for player_team in player_info:

        possession_number = player_info[player_team]["possessions"]
        player_name, team_code = player_team.split("_")
        #print(player_info[player_team]["stub_name"], team_code)
        db.players.update_one(
            filter = {
                "player_index.name_stub": player_info[player_team]["stub_name"],
                "player_index.team": team_code,                
                "player_index.season": year_as_int
            },
            update = {
                "$set": {
                    "{}_possessions".format(season_type): possession_number
                }
            },
            upsert=True
        )

    if season_type == "regular_season": 
        db.seasons.find_one_and_update(
            { "year_string": year_string },
            { "$set" : { "player_info": player_info  } }
        )
示例#4
0
def calculate_rapm(year, X, Y, metric="rapm", season_type="regular_season"):

    year_as_int = int(year)
    year_string = common_utils.construct_year_string(year)
    print('fitting model...')
    timestamp = time.perf_counter()
    multiplier = 1
    if metric == "apm": 
        clf = linear_model.LinearRegression()
        multiplier = 10
    elif metric == "rapm":
        clf = linear_model.Ridge(alpha=2900)
        multiplier = 100
    elif metric == "rapm_enet_cv":
        clf = linear_model.ElasticNetCV(l1_ratio=.9,cv=3)
    elif metric == "rapm_bayes_ridge":
        clf = linear_model.BayesianRidge()
    elif metric == "rapm_cv":
        clf = linear_model.RidgeCV(alphas=list(range(2600,3000,50)), cv=5)
        multiplier = 100
    else:
        raise RuntimeError("{} not recognized".format(metric))

    clf.fit(X, Y)

    if metric == "rapm_enet_cv" or metric == "rapm_cv":
        print("alpha chosen: {}".format(clf.alpha_))
    if metric == "rapm_enet_cv":
        print("l1_ratio chosen: {}".format(clf.l1_ratio_))

    print("time took {} seconds".format(time.perf_counter()-timestamp))
    all_players_dict = db.seasons.find_one({"year_string": year_string})["player_info"]

    print("coefficients", clf.coef_.shape)
    print("num of players", len(all_players_dict))
    rapm = list(zip(all_players_dict, clf.coef_*multiplier, clf.coef_[len(all_players_dict):]*-1*multiplier))


    print(all_players_dict)
    print(len(rapm))


    for player, opm, dpm in rapm:
        player_name, team_code = player.split("_")
        data = {
            'player': player_name,
            'team': team_code,
            'o' + metric: opm,
            'd' + metric: dpm,
            metric: opm + dpm
        }

        stub_name = common_utils.player_to_stub_name(data["player"])
        team_code = data["team"]
        year_as_int = int(year)

        print ("updating {}_{}_{}".format(stub_name, team_code, year_as_int))
        db.players.update_one(
            {
                "player_index.name_stub": stub_name,
                "player_index.team": team_code,
                "player_index.season": year_as_int
            },
            {
                "$set": {
                    "o" + metric + "_" + season_type: data["o" + metric],
                    "d" + metric + "_" + season_type: data["d" + metric],
                    metric + "_" + season_type: data[metric]
                }
            }
        )

    print("Sorted & filtered {} for {}".format(metric, year_string))
    poss_threshold = 1000
    sorted_opm = db.players.find(
        filter = {"player_index.season": year_as_int, "possessions": {"$gt": poss_threshold}},
        projection = {"player": 1, metric + "_" + season_type: 1},
        sort = [(metric + "_" + season_type, -1)]
    )
    rankings_to_print = 50
    for player in sorted_opm:
        if "player" in player:
            print("{:20} {:>10}".format(player["player"], player[metric + "_" + season_type]))
            rankings_to_print += -1
            if rankings_to_print == 0:
                break
    return clf
示例#5
0
def get_and_parse_event_list_for_lineups(event_list, team_info, year):
    """
    Take rowSet of events from API call and return a list of equal size with the 5 man lineup on the floor for each team for each event
    Example output format:
    [
        {
            "Lakers": ["Lonzo Ball", "Brandon Ingram", "Corey Brewer", "Julius Randle", "Brook Lopez"],
            "Warriors": ["Stephen Curry", "Kevin Durant", "Klay Thompson", "Draymond Green", "Andre Iguodala"],
            "event_num": 2
        }, ...
    ]
    """
    player_ids = {}

    year_as_int = int(year)

    home_team = team_info["home_team"]
    away_team = team_info["away_team"]

    lineups = {team_info["home_team"]: set(), team_info["away_team"]: set()}

    event_with_lineups_list = []
    current_quarter = -1

    for index, event in enumerate(event_list):
        #print(event_with_lineups_list)
        # reset lineups when quarter changes
        if current_quarter != event[event_field["PERIOD"]]:
            current_quarter = event[event_field["PERIOD"]]
            lineups[team_info["home_team"]] = set()
            lineups[team_info["away_team"]] = set()
            quarter_start_index = index

        # this piece of code is for a single empty event toward the end of this game:
        # http://stats.nba.com/game/0020000883/playbyplay/ that causes problems otherwise
        if event[event_field["EVENTMSGTYPE"]] == event_type[
                "SUB"] and get_message(event) is None:
            pass
        # substitution event
        # player 1 is being substituted out
        # player 2 is coming in
        elif event[event_field["EVENTMSGTYPE"]] == event_type["SUB"]:
            #print_event(event)
            team = common_utils.convert_nba_espn_team_codes_to_bball_ref(
                event[event_field["PLAYER1_TEAM_ABBREVIATION"]], year_as_int)
            try:
                player_to_sub_out = common_utils.nba_com_player_name_to_bball_ref_player_name(
                    event[event_field["PLAYER1_NAME"]], team, year_as_int)
            except RuntimeError as e:
                print_event(event)
            try:
                player_to_sub_in = common_utils.nba_com_player_name_to_bball_ref_player_name(
                    event[event_field["PLAYER2_NAME"]], team, year_as_int)
            except RuntimeError as e:
                print_event(event)

            if (player_to_sub_out, team, year_as_int) not in player_ids:
                player_ids[(player_to_sub_out, team,
                            year_as_int)] = event[event_field["PLAYER1_ID"]]
            if (player_to_sub_in, team, year_as_int) not in player_ids:
                player_ids[(player_to_sub_in, team,
                            year_as_int)] = event[event_field["PLAYER2_ID"]]

            if player_to_sub_out not in lineups[team]:
                # backfill
                for e in event_with_lineups_list[quarter_start_index:]:
                    e[team].add(player_to_sub_out)

            lineups[team].discard(player_to_sub_out)
            lineups[team].add(player_to_sub_in)

        event_with_lineups_list.append({
            team_info["home_team"]:
            lineups[team_info["home_team"]].copy(),
            team_info["away_team"]:
            lineups[team_info["away_team"]].copy(),
            "event_num":
            event[event_field["EVENTNUM"]],
        })

        # player1, player2, player3
        if event[event_field["EVENTMSGTYPE"]] != event_type["SUB"]:
            f = [
                event_field['PLAYER1_NAME'], event_field['PLAYER2_NAME'],
                event_field['PLAYER3_NAME']
            ]
            for p_index in f:
                # team code is always an offset of 4 from player name
                if event[p_index] and event[p_index + 4]:

                    team_code = common_utils.convert_nba_espn_team_codes_to_bball_ref(
                        event[p_index + 4], year_as_int)
                    try:
                        player_name = common_utils.nba_com_player_name_to_bball_ref_player_name(
                            event[p_index], team_code, year_as_int)
                    except RuntimeError as e:
                        print(e)
                    if (player_name, team_code, year_as_int) not in player_ids:
                        # player id is offset -1 from player name
                        player_ids[(player_name, team_code,
                                    year_as_int)] = event[p_index - 1]

                    if len(lineups[team_code]
                           ) < 5 and player_name not in lineups[team_code]:
                        lineups[team_code].add(player_name)

                        # backfill
                        for e in event_with_lineups_list[quarter_start_index:]:
                            e[team_code].add(player_name)

    for (t_player_name, t_team_code, t_year) in player_ids:
        stub_name = common_utils.player_to_stub_name(t_player_name)
        player = db.players.update_one(
            {
                "player_index.name_stub": stub_name,
                "player_index.team": t_team_code,
                "player_index.season": t_year
            }, {
                "$set": {
                    "nba_com_id":
                    player_ids[(t_player_name, t_team_code, t_year)]
                }
            })

    return event_with_lineups_list
示例#6
0
def calculate_weighted_average(year):
    year_as_int = int(year)
    metrics_averages = {
        "per": 15.0,
        "bpm": 0.0,
        "ws_per_48": 0.100,
        "rapm": 0.0,
        "apm": 0.0
    }

    if int(year) >= 2014:
        metrics_averages["rpm"] = 0.0

    year_as_int = int(year)
    year_string = common_utils.construct_year_string(year)
    teams = db.teams.find({"season": year})
    for team in teams:
        team_code = team["team_code"]
        # get the players to loop through
        players = db.players.find({
            "player_index.season": year_as_int,
            "player_index.team": team_code
        })
        players = list(players)
        if len(players) == 0 and team_code in common_utils.team_renames:
            players = db.players.find({
                "player_index.season":
                year_as_int,
                "player_index.team":
                common_utils.team_renames[team_code]
            })
            players = list(players)

        if len(players) == 0:
            print("could not find {}_{}".format(year_to_evaluate,
                                                team["team_index"]["team_id"]))
            continue

        for year_step in range(4):
            timestamp = time.perf_counter()

            year_to_evaluate = year - year_step
            total_possessions = 0
            weighted_average = 0

            for metric in metrics_averages:
                print("{} for {} {}\n".format(metric, year_to_evaluate,
                                              team_code))
                if year_to_evaluate < 1997:
                    print("no data available for prior to 1997")
                    continue
                for player in players:
                    # original measure was 250 minutes
                    if "possessions" in player and "player" in player:
                        stub_name = common_utils.player_to_stub_name(
                            player["player"])
                        prev_season_player_lookup = db.players.find({
                            "player_index.season":
                            year_to_evaluate,
                            "player_index.name_stub":
                            stub_name
                        })

                        if not player_exists(stub_name, year_to_evaluate):
                            if player_minus_jr_exists(stub_name,
                                                      year_to_evaluate):
                                stub_name = stub_name[:len(stub_name) - 2]
                                print("substracted suffix, found {}".format(
                                    stub_name))

                            if stub_name in common_utils.nba_com_stubs_to_bball_ref_stubs:
                                stub_name = common_utils.nba_com_stubs_to_bball_ref_stubs[
                                    stub_name]

                        # if player still doesn't exist, then idk
                        if not player_exists(stub_name, year_to_evaluate):
                            print(
                                "{}_{} not found in scraped bball_ref players page"
                                .format(stub_name, year_to_evaluate))
                        else:
                            prev_season_player_lookup = db.players.find({
                                "player_index.season":
                                year_to_evaluate,
                                "player_index.name_stub":
                                stub_name,
                                "player_index.team":
                                team_code
                            })

                        metric_value = 0
                        prev_poss_count = 0

                        # calculate metric value (done in this way because to combine entries for players who were traded)
                        for prev_player_season in prev_season_player_lookup:
                            if "possessions" in prev_player_season:
                                try:
                                    prev_poss_count += prev_player_season[
                                        "possessions"]
                                    #print("\t{} - {}: {} Poss {}".format(player["player"], metric, player[metric], player["possessions"]))
                                    metric_value += float(
                                        prev_player_season[metric]
                                    ) * prev_player_season["possessions"]
                                except:
                                    print("error trying to convert to float ",
                                          metric, prev_player_season)
                            else:
                                metric_value = metrics_averages[metric]
                        if prev_poss_count > 750:
                            metric_value = metric_value / prev_poss_count
                        else:
                            metric_value = metrics_averages[metric]

                        try:
                            total_possessions += player["possessions"]
                            weighted_average += player[
                                "possessions"] * metric_value
                        except:
                            print(player)
                    else:
                        player_name = player["player"] if player.get(
                            "player") else "unknown_player_name"
                        team_code = player["team_id"] if player.get(
                            "team_id") else "unknown_team"
                        player_key = "{}_{}_{}".format(player_name, year,
                                                       team_code)
                        players_without_possessions.add(player_key)
                    if total_possessions == 0:
                        print(players)
                        continue
                if total_possessions > 0:
                    weighted_average = weighted_average / total_possessions
                    print("{}_{} has a weighted average of {} {}\n".format(
                        team_code, year_to_evaluate, weighted_average, metric))

                    db.teams.update_one(
                        {
                            "team_index.season": year,
                            "team_index.team_id": team_code
                        }, {
                            "$set": {
                                "Y-{}_{}".format(year_step, metric):
                                weighted_average
                            }
                        })
                total_possessions = 0
                weighted_average = 0
            print("time took {} seconds for one year step".format(
                time.perf_counter() - timestamp))
    pprint(players_without_possessions)