def scrape_nba_results(): ''' Scrape recent NBA results''' url = 'http://www.betexplorer.com/basketball/usa/nba/results/' df = pd.read_html(get(url).text)[0] homeTeam = df[0].apply(lambda r: str.split(r, sep='-')[0].strip()) homeTeam[homeTeam == 'Portland Trail Blazers'] = 'Portland Trailblazers' homePoints = df[1].apply(lambda r: str.split(r, sep=':')[0].strip()) awayTeam = df[0].apply(lambda r: str.split(r, sep='-')[1].strip()) awayTeam[awayTeam == 'Portland Trail Blazers'] = 'Portland Trailblazers' awayPoints = df[1].apply(lambda r: str.split(r, sep=':')[1].strip()) awayPoints = awayPoints.apply(lambda r: str.split(r, sep='ET')[0].strip()) dates = df[4].apply(lambda r: datetime.strptime(r, '%d.%m.%Y')) # The dates on this website are GMT so are one day advanced. dates = dates.apply(lambda r: datetime.strftime(r - timedelta(days=1), '%d/%m/%Y')) df['Date'] = dates df['HomeTeam'] = homeTeam df['AwayTeam'] = awayTeam df['HomePoints'] = homePoints df['AwayPoints'] = awayPoints df['HomeWin'] = homePoints > awayPoints df = df.ix[:, 5:11] teams = lookup_teams() df['HomeId'] = df.merge(df.merge(teams, left_on='HomeTeam', right_on='Franchise', sort=False))['TeamId'] df['AwayId']= df.merge(df.merge(teams, left_on='AwayTeam', right_on='Franchise', sort=False))['TeamId'] return df
def scrape_model_probs(): '''Scrape CARM-Elo probabilities for next round of matches''' url = 'http://projects.fivethirtyeight.com/2016-nba-picks/' df = pd.read_html(get(url).text)[1].ix[:, 1:5] df = df[df.ix[:, 0] != 'X'] df.columns = ['AwayTeam', 'AwayModelProb', 'HomeModelProb', 'HomeTeam'] def parse_probability(string): return float(str.split(string, sep='%')[0])/100 df['AwayModelProb'] = df['AwayModelProb'].apply(parse_probability) df['HomeModelProb'] = df['HomeModelProb'].apply(parse_probability) teams = lookup_teams() away = pd.merge(df, teams, left_on='AwayTeam', right_on='Abbreviation538') home = pd.merge(df, teams, left_on='HomeTeam', right_on='Abbreviation538') df['Date'] = datetime.today().strftime('%d/%m/%Y') df['AwayId'] = away['TeamId'] df['AwayTeam'] = away['Franchise'] df['HomeId'] = home['TeamId'] df['HomeTeam'] = home['Franchise'] df = df[['Date', 'HomeId', 'AwayId', 'HomeTeam', 'AwayTeam', 'HomeModelProb', 'AwayModelProb']] return df