Exemplo n.º 1
0
    def test_all_player_features(self):

        season = Season(2013)

        all_players = players.find({}).sort('id', pymongo.ASCENDING)
        offensive_features = []
        defensive_features = []

        for player_data in all_players:
            player = Player(player_data['id'])
            print 'Extracting offensive features for {} from the {}'.format(player, season)
            o_features = player_ocluster_features(player, season)
            print 'Extracting defensive features for {} from the {}'.format(player, season)
            d_features = player_dcluster_features(player, season)
            offensive_features.append(o_features)
            defensive_features.append(d_features)

        offensive_features = np.array(offensive_features)
        defensive_features = np.array(defensive_features)

        o_features_file = os.path.join('season_data', '2013', 'player_offense_features.csv')
        d_features_file = os.path.join('season_data', '2013', 'player_defense_features.csv')

        o_header = 'id,ast_pct,ts_pct,orb_pct,usg,ortg,mp_pct'
        d_header = 'id,blk_pct,stl_pct,drb_pct,drtg,pf_pct,mp_pct'

        np.savetxt(o_features_file, offensive_features, delimiter=',', header=o_header)
        np.savetxt(d_features_file, defensive_features, delimiter=',', header=d_header)

        self.assertTrue(os.path.exists(o_features_file))
        self.assertTrue(os.path.exists(d_features_file))
Exemplo n.º 2
0
def test_all_player_features(self):

    season = Season(2013)

    all_players = players.find({}).sort('id', pymongo.ASCENDING)
    offensive_features = []

    for player_data in all_players:
        player = Player(player_data['id'])
        print 'Extracting offensive features for {} from the {}'.format(player, season)
        features = player_ocluster_features(player, season)
        offensive_features.append(features)

    offensive_features = np.array(offensive_features)
Exemplo n.º 3
0
def extract_data(filename='shots.csv'):
    r, c, s, g, t, p = dict(),  dict(),  dict(),  dict(),  dict(),  dict()
    for region in regions.find():
        player_id = region['regionId']
        r[player_id] = region['name']
    for tournament in tournaments.find():
        player_id = tournament['tournamentId']
        c[player_id] = tournament['name']
    for season in seasons.find():
        player_id = season['seasonId']
        s[player_id] = season['name']
    for stage in stages.find():
        player_id = stage['stageId']
        g[player_id] = stage['name']
    for team in teams.find():
        player_id = team['teamId']
        t[player_id] = team['name']
    for player in players.find():
        player_id = player['playerId']
        p[player_id] = player['name']

    f = open(filename, 'w', newline='\n')
    writer = csv.DictWriter(f, fieldnames=FIELDNAMES + QUALIFIERS, extrasaction='ignore')
    writer.writeheader()

    # goals = events.count({'isGoal': True, 'isOwnGoal': {'$exists': False}})
    # shots = events.count({'isShot': True, 'isOwnGoal': {'$exists': False}})
    # print('{0:,} goals from {1:,} shots ({2:.1%} shot rate)'.format(goals, shots, goals / shots))

    for event in events.find({'isShot': True, 'isOwnGoal': {'$exists': False}}).sort('matchId', -1):
        event['isGoal'] = 1 if event.get('isGoal') else 0
        event['Region'] = r[event['regionId']] if event.get('regionId') else None
        event['Tournament'] = c[event['tournamentId']] if event.get('tournamentId') else None
        event['Season'] = s[event['seasonId']] if event.get('seasonId') else None
        event['Stage'] = g[event['stageId']] if event.get('stageId') else None
        event['Team'] = t[event['teamId']] if event.get('teamId') else None
        try:
            event['Player'] = p[event['playerId']] if event.get('playerId') else None
        except KeyError:
            player_id = event['playerId']
            print('Missing playerId: {}'.format(player_id))
            get_player(player_id)
            event['Player'] = None
        event_qualifiers = {q['type']['displayName']: q.get('value', 1) for q in event['qualifiers']}
        for qualifier in QUALIFIERS:
            event[qualifier] = event_qualifiers.get(qualifier, 0)

        writer.writerow(event)
Exemplo n.º 4
0
Arquivo: nba.py Projeto: DimosGu/nba
def calc_all_player_times(year, recompute=False):

    class TimeComputationError(Exception):

        def __init__(self, msg):
            self.msg = msg
        def __str__(self):
            return self.msg

    all_players = players.find({}).sort('id', pymongo.ASCENDING)

    print('Loading the {}-{} NBA season'.format(year, year + 1))
    season = Season(year)
    print('Loaded season')
    print('Computing time on court for all players in all games...')

    for player_data in all_players:
        player = Player(player_data['id'])
        games_played = season.get_player_games_in_range(player)
        for game in games_played:
            print('Calculating time on court for {} ({}) in {} ({})'.format(player, player.id, game, game.id))
            boxscore_minutes = game.player_boxscore(player)['totalSecondsPlayed'] / 60.0
            if boxscore_minutes > 0:
                time_on_court = player.time_on_court(game, recompute=recompute)
                computed_minutes = compute_ts_length(time_on_court, unit='minutes')
            else:
                # there's never anything to calculate anyway
                computed_minutes = 0
            if not abs(computed_minutes - boxscore_minutes) <= 0.5:
                print('In computing playing time for {} ({}) in {} ({}):'.format(player, player.id, game, game.id),
                      file=sys.stderr)
                print('Discrepancy between computed time: {0:2.2f}, and boxscore time: {1:2.2f}'.format(computed_minutes, boxscore_minutes),
                      file=sys.stderr)
                #raise TimeComputationError('Discrepancy between computed time: {}, and boxscore time: {}'.format(computed_minutes, boxscore_minutes)

            else:
                print('{} played {} minutes in {}'.format(player, round(computed_minutes, 3), game))
Exemplo n.º 5
0
def get_all_player_features(season, start_date=None, end_date=None, recompute=False):

    all_players = players.find({}).sort('id', pymongo.ASCENDING)

    o_header = ['ast_pct', 'ts_pct', 'orb_pct', 'usg', 'ortg', 'mp_pct']
    d_header = ['blk_pct', 'stl_pct', 'drb_pct', 'drtg', 'pf_pct']

    str_format = '%Y-%m-%d'
    feature_file = 'player-features-from-{}-to-{}.csv'.format(start_date.strftime(str_format), end_date.strftime(str_format))
    path = os.path.join('season_data', str(season.season), feature_file)

    if os.path.exists(path) and not recompute:
        player_features = pd.read_csv(path, index_col=0)
    else:

        feature_headers = o_header + d_header

        index = []
        all_features = []

        for i, player_data in enumerate(all_players):
            player = Player(player_data['id'])
            index.append(player.id)
            #print 'Extracting offensive features for {} from the {}'.format(player, season)
            o_features = player_ocluster_features(player, season, start_date, end_date)
            #print 'Extracting defensive features for {} from the {}'.format(player, season)
            d_features = player_dcluster_features(player, season, start_date, end_date)

            # drop the id in d_features
            all_features.append(o_features[1:] + d_features[1:-1])

        player_features = pd.DataFrame(data=all_features, columns=feature_headers, index=index)
        player_features.index.name = 'id'
        player_features.to_csv(path)

    return player_features
Exemplo n.º 6
0
def extract_data(filename='shots.csv'):
    r, c, s, g, t, p = dict(), dict(), dict(), dict(), dict(), dict()
    for region in regions.find():
        player_id = region['regionId']
        r[player_id] = region['name']
    for tournament in tournaments.find():
        player_id = tournament['tournamentId']
        c[player_id] = tournament['name']
    for season in seasons.find():
        player_id = season['seasonId']
        s[player_id] = season['name']
    for stage in stages.find():
        player_id = stage['stageId']
        g[player_id] = stage['name']
    for team in teams.find():
        player_id = team['teamId']
        t[player_id] = team['name']
    for player in players.find():
        player_id = player['playerId']
        p[player_id] = player['name']

    f = open(filename, 'w', newline='\n')
    writer = csv.DictWriter(f,
                            fieldnames=FIELDNAMES + QUALIFIERS,
                            extrasaction='ignore')
    writer.writeheader()

    # goals = events.count({'isGoal': True, 'isOwnGoal': {'$exists': False}})
    # shots = events.count({'isShot': True, 'isOwnGoal': {'$exists': False}})
    # print('{0:,} goals from {1:,} shots ({2:.1%} shot rate)'.format(goals, shots, goals / shots))

    for event in events.find({
            'isShot': True,
            'isOwnGoal': {
                '$exists': False
            }
    }).sort('matchId', -1):
        event['isGoal'] = 1 if event.get('isGoal') else 0
        event['Region'] = r[event['regionId']] if event.get(
            'regionId') else None
        event['Tournament'] = c[event['tournamentId']] if event.get(
            'tournamentId') else None
        event['Season'] = s[event['seasonId']] if event.get(
            'seasonId') else None
        event['Stage'] = g[event['stageId']] if event.get('stageId') else None
        event['Team'] = t[event['teamId']] if event.get('teamId') else None
        try:
            event['Player'] = p[event['playerId']] if event.get(
                'playerId') else None
        except KeyError:
            player_id = event['playerId']
            print('Missing playerId: {}'.format(player_id))
            get_player(player_id)
            event['Player'] = None
        event_qualifiers = {
            q['type']['displayName']: q.get('value', 1)
            for q in event['qualifiers']
        }
        for qualifier in QUALIFIERS:
            event[qualifier] = event_qualifiers.get(qualifier, 0)

        writer.writerow(event)