Exemplo n.º 1
0
def getChampions(soup, season):
    global allChampions
    p = soup.select_one('#meta > div:nth-child(2) > p')
    a = p.select_one('a')
    name = a.get_text()
    team_id = int(searchTeamByName(name)[0])
    allChampions.append([team_id, season])
Exemplo n.º 2
0
def helperTwo(row, id, season):
    team_id = int(
        searchTeamByName(
            row.select_one('[data-stat="team_name"] > a').get_text())[0])
    g = toIntOrFloat(row, '[data-stat="g"]')
    mp = toIntOrFloat(row, '[data-stat="mp"]')
    fg = toIntOrFloat(row, '[data-stat="opp_fg"]')
    fga = toIntOrFloat(row, '[data-stat="opp_fga"]')
    fg_pct = toIntOrFloat(row, '[data-stat="opp_fg_pct"]')
    fg3 = toIntOrFloat(row, '[data-stat="opp_fg3"]')
    fg3a = toIntOrFloat(row, '[data-stat="opp_fg3a"]')
    fg3_pct = toIntOrFloat(row, '[data-stat="opp_fg3_pct"]')
    fg2 = toIntOrFloat(row, '[data-stat="opp_fg2"]')
    fg2a = toIntOrFloat(row, '[data-stat="opp_fg2a"]')
    fg2_pct = toIntOrFloat(row, '[data-stat="opp_fg2_pct"]')
    ft = toIntOrFloat(row, '[data-stat="opp_ft"]')
    fta = toIntOrFloat(row, '[data-stat="opp_fta"]')
    ft_pct = toIntOrFloat(row, '[data-stat="opp_ft_pct"]')
    orb = toIntOrFloat(row, '[data-stat="opp_orb"]')
    drb = toIntOrFloat(row, '[data-stat="opp_drb"]')
    trb = toIntOrFloat(row, '[data-stat="opp_trb"]')
    ast = toIntOrFloat(row, '[data-stat="opp_ast"]')
    stl = toIntOrFloat(row, '[data-stat="opp_stl"]')
    blk = toIntOrFloat(row, '[data-stat="opp_blk"]')
    tov = toIntOrFloat(row, '[data-stat="opp_tov"]')
    pf = toIntOrFloat(row, '[data-stat="opp_pf"]')
    pts = toIntOrFloat(row, '[data-stat="opp_pts"]')

    return [
        id, team_id, season, g, mp, fg, fga, fg_pct, fg3, fg3a, fg3_pct, fg2,
        fg2a, fg2_pct, ft, fta, ft_pct, orb, drb, trb, ast, stl, blk, tov, pf,
        pts
    ]
Exemplo n.º 3
0
def helperFive(row, id, season):
    # 'id', 'team_id', 'season', 'g', 'mp' 'fg_pct', avg_dist, fg2a_ct_fga, pct_fga_00_03, pct_fga_03_10, pct_fga_10_16, pct_fga_16_xx, fg3a_pct_fga, fg2_pct, fg_pct_00_03, fg_pct_03_10, fg_pct_10_16, fg_pct_16_xx, fg3_pct, fg2_pct_ast, pct_fg2_dunk, fg2_dunk, pct_fg2_layup, fg2_layup, fg3_pct_ast, pct_fg3a_corner, fg3_pct_corner, fg3a_heave, fg3_heave
    team_id = int(
        searchTeamByName(
            row.select_one('[data-stat="team_name"] > a').get_text())[0])
    g = toIntOrFloat(row, '[data-stat="g"]')
    mp = toIntOrFloat(row, '[data-stat="mp"]')
    fg_pct = toIntOrFloat(row, '[data-stat="opp_fg_pct"]')
    avg_dist = toIntOrFloat(row, '[data-stat="opp_avg_dist"]')
    fg2a_ct_fga = toIntOrFloat(row, '[data-stat="opp_fg2a_ct_fga"]')
    pct_fga_00_03 = toIntOrFloat(row, '[data-stat="opp_pct_fga_00_03"]')
    pct_fga_03_10 = toIntOrFloat(row, '[data-stat="opp_pct_fga_03_10"]')
    pct_fga_10_16 = toIntOrFloat(row, '[data-stat="opp_pct_fga_10_16"]')
    pct_fga_16_xx = toIntOrFloat(row, '[data-stat="opp_pct_fga_16_xx"]')
    fg3a_pct_fga = toIntOrFloat(row, '[data-stat="opp_fg3a_pct_fga"]')
    fg2_pct = toIntOrFloat(row, '[data-stat="opp_fg2_pct"]')
    fg_pct_00_03 = toIntOrFloat(row, '[data-stat="opp_fg_pct_00_03"]')
    fg_pct_03_10 = toIntOrFloat(row, '[data-stat="opp_fg_pct_03_10"]')
    fg_pct_10_16 = toIntOrFloat(row, '[data-stat="opp_fg_pct_10_16"]')
    fg_pct_16_xx = toIntOrFloat(row, '[data-stat="opp_fg_pct_16_xx"]')
    fg3_pct = toIntOrFloat(row, '[data-stat="opp_fg3_pct"]')
    fg2_pct_ast = toIntOrFloat(row, '[data-stat="opp_fg2_pct_ast"]')
    pct_fg2_dunk = toIntOrFloat(row, '[data-stat="opp_pct_fg2_dunk"]')
    fg2_dunk = toIntOrFloat(row, '[data-stat="opp_fg2_dunk"]')
    pct_fg2_layup = toIntOrFloat(row, '[data-stat="opp_pct_fg2_layup"]')
    fg2_layup = toIntOrFloat(row, '[data-stat="opp_fg2_layup"]')
    fg3_pct_ast = toIntOrFloat(row, '[data-stat="opp_fg3_pct_ast"]')
    pct_fg3a_corner = toIntOrFloat(row, '[data-stat="opp_pct_fg3a_corner"]')
    fg3_pct_corner = toIntOrFloat(row, '[data-stat="opp_fg3_pct_corner"]')
    fg3a_heave = toIntOrFloat(row, '[data-stat="opp_fg3a_heave"]')
    fg3_heave = toIntOrFloat(row, '[data-stat="opp_fg3_heave"]')

    return [
        id, team_id, season, g, mp, fg_pct, avg_dist, fg2a_ct_fga,
        pct_fga_00_03, pct_fga_03_10, pct_fga_10_16, pct_fga_16_xx,
        fg3a_pct_fga, fg2_pct, fg_pct_00_03, fg_pct_03_10, fg_pct_10_16,
        fg_pct_16_xx, fg3_pct, fg2_pct_ast, pct_fg2_dunk, fg2_dunk,
        pct_fg2_layup, fg2_layup, fg3_pct_ast, pct_fg3a_corner, fg3_pct_corner,
        fg3a_heave, fg3_heave
    ]
Exemplo n.º 4
0
def helperThree(row, id, season):
    # id team_id season age wins losses wins_pyth losses_pyth mov sos srs off_rtg def_rtg net_rtg pace fta_per_fga_pct fg3a_per_fga_pct ts_pct efg_pct tov_pct orb_pct ft_rate opp_efg_pct opp_tov_pct drb_pct opp_ft_rate arena_name attendance attendance_per_g
    team_id = int(
        searchTeamByName(
            row.select_one('[data-stat="team_name"] > a').get_text())[0])
    age = toIntOrFloat(row, '[data-stat="age"]')
    wins = toIntOrFloat(row, '[data-stat="wins"]')
    losses = toIntOrFloat(row, '[data-stat="g"]')
    wins_pyth = toIntOrFloat(row, '[data-stat="wins_pyth"]')
    losses_pyth = toIntOrFloat(row, '[data-stat="losses_pyth"]')
    mov = toIntOrFloat(row, '[data-stat="mov"]')
    sos = toIntOrFloat(row, '[data-stat="sos"]')
    srs = toIntOrFloat(row, '[data-stat="srs"]')
    off_rtg = toIntOrFloat(row, '[data-stat="off_rtg"]')
    def_rtg = toIntOrFloat(row, '[data-stat="def_rtg"]')
    net_rtg = toIntOrFloat(row, '[data-stat="net_rtg"]')
    pace = toIntOrFloat(row, '[data-stat="pace"]')
    fta_per_fga_pct = toIntOrFloat(row, '[data-stat="fta_per_fga_pct"]')
    fg3a_per_fga_pct = toIntOrFloat(row, '[data-stat="fg3a_per_fga_pct"]')
    ts_pct = toIntOrFloat(row, '[data-stat="ts_pct"]')
    efg_pct = toIntOrFloat(row, '[data-stat="efg_pct"]')
    tov_pct = toIntOrFloat(row, '[data-stat="tov_pct"]')
    orb_pct = toIntOrFloat(row, '[data-stat="orb_pct"]')
    ft_rate = toIntOrFloat(row, '[data-stat="ft_rate"]')
    opp_efg_pct = toIntOrFloat(row, '[data-stat="opp_efg_pct"]')
    opp_tov_pct = toIntOrFloat(row, '[data-stat="opp_tov_pct"]')
    drb_pct = toIntOrFloat(row, '[data-stat="drb_pct"]')
    opp_ft_rate = toIntOrFloat(row, '[data-stat="opp_ft_rate"]')
    arena_name = row.select_one('[data-stat="arena_name"]').get_text()
    attendance = toIntOrFloat(row, '[data-stat="attendance"]')
    attendance_per_g = toIntOrFloat(row, '[data-stat="attendance_per_g"]')

    return [
        id, team_id, season, age, wins, losses, wins_pyth, losses_pyth, mov,
        sos, srs, off_rtg, def_rtg, net_rtg, pace, fta_per_fga_pct,
        fg3a_per_fga_pct, ts_pct, efg_pct, tov_pct, orb_pct, ft_rate,
        opp_efg_pct, opp_tov_pct, drb_pct, opp_ft_rate, arena_name, attendance,
        attendance_per_g
    ]
Exemplo n.º 5
0
def getGameData(url, year):
    global playoff
    global id

    html_source = requests.get(url)
    soup = BeautifulSoup(html_source.content, 'html.parser')

    if soup.find('h1').get_text() == 'Page Not Found (404 error)':
        return

    season = soup.select_one(
        'ul.hoversmooth > li.index > a > u').get_text()[:7]
    table_rows = soup.select('#schedule > tbody > tr')

    for tr in table_rows:
        try:
            if (
                    type(tr.contents[0]) is Tag
                    and tr.contents[0].get_text() == 'Playoffs'
            ) or url == 'https://www.basketball-reference.com/leagues/NBA_1980_games-april.html':
                playoff = True
        except:
            pass

        # try:
        if not tr.get('class'):
            date = tr.select_one('[data-stat="date_game"]').get_text()
            awayTeamName = tr.select_one(
                '[data-stat="visitor_team_name"]').get_text()
            awayTeamId = int(searchTeamByName(awayTeamName)[0])
            awayTeamScore = tr.select_one(
                '[data-stat="visitor_pts"]').get_text()
            homeTeamName = tr.select_one(
                '[data-stat="home_team_name"]').get_text()
            homeTeamId = int(searchTeamByName(homeTeamName)[0])
            homeTeamScore = tr.select_one('[data-stat="home_pts"]').get_text()
            overtime = tr.select_one('[data-stat="overtimes"]').get_text()
            attendance = tr.select_one('[data-stat="attendance"]').get_text()

            if attendance != '':
                attendance = int(attendance.replace(',', ''))

            notes = tr.select_one('[data-stat="game_remarks"]').get_text()
            boxScoreLink = ''

            try:
                boxScoreLink = 'https://www.basketball-reference.com' + tr.select_one(
                    '[data-stat="box_score_text"] > a').get("href")

            except:
                date2 = datetime.strptime(date, '%a, %b %d, %Y')
                month = date2.month
                day = date2.day
                if month < 10:
                    month = '0' + str(month)
                else:
                    month = str(month)

                if day < 10:
                    day = '0' + str(day)
                else:
                    day = str(day)
                homeShortName = searchTeamByName(homeTeamName)[2]

                boxScoreLink = 'https://www.basketball-reference.com/boxscores/' + \
                    str(date2.year) + month + day + \
                    '0' + homeShortName + '.html'

            storeValue = [
                id, homeTeamId, homeTeamScore, awayTeamId, awayTeamScore, date,
                overtime, attendance, playoff, season, notes
            ]

            boxScoreLinks.append([
                id, boxScoreLink, awayTeamId, homeTeamId, season, playoff, date
            ])

            id += 1

            # if playoff:
            #     playoffGames.append(storeValue)
            # else:
            games.append(storeValue)
Exemplo n.º 6
0
def getPlayerData(url):
    html = requests.get(url[1])
    soup = BeautifulSoup(html.content, 'html.parser')
    team_ids = soup.select('#per_game > tfoot > tr [data-stat=team_id]')
    div = soup.select_one('#meta > div[itemtype="https://schema.org/Person"]')

    teams = []
    fullName = ''
    name = ''
    current_team_id = ''
    birth_place = ''
    birth_date = ''
    height = ''
    weight = ''
    playerUrl = '/'.join(url[1].split('/')[-3:])
    retired = url[3]
    name = div.select_one('h1').get_text().strip()
    injury = False

    for team_id in team_ids:
        shortName = team_id.get_text()
        if searchTeamByShortName(shortName) != None:
            teams.append(int(searchTeamByShortName(shortName)))

    if div.select_one(
            'p:nth-child(2) > strong').get_text().strip() == 'Pronunciation':
        fullName = div.select_one('p:nth-child(3) > strong').get_text().strip()
    else:
        fullName = div.select_one('p:nth-child(2) > strong').get_text().strip()

    try:
        height = div.select_one('span[itemprop="height"]').get_text()
    except:
        pass

    try:
        weight = div.select_one('span[itemprop="weight"]').get_text()[:-2]
    except:
        pass

    try:
        birth_date = div.select_one('#necro-birth > a:first-child').get_text(
        ).strip() + " " + div.select_one(
            '#necro-birth > a:nth-child(2)').get_text().strip()
    except:
        pass

    try:
        find_current_team = div.select_one('[href*="/2020.html"]').get_text()
        current_team_id = searchTeamByName(find_current_team)[0]
        if len(teams) == 0 and current_team_id:
            teams.append(current_team_id)
    except:
        pass

    try:
        birth_place = div.select_one(
            'span[itemprop="birthPlace"]').get_text().strip()[3:]
    except:
        pass

    if soup.select_one('#injury'):
        injury = True

    with open('players.csv', 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([
            url[0], name, fullName, url[2], height, weight, birth_date,
            birth_place, retired, playerUrl, current_team_id, teams, injury
        ])