def get_box_score(html, url_path, match_date): conn = get_connector() soup = BeautifulSoup(html) away_team = '' home_team = '' away_team_point = [] home_team_point = [] nbaGIGmeLve = soup.find(id='nbaGIGmeLve') if type(nbaGIGmeLve) != types.NoneType: nbaGITmeQtr = soup.find(id='nbaGITmeQtr') if type(nbaGITmeQtr) != types.NoneType: table = nbaGITmeQtr.find('table') if type(table) != types.NoneType: if len(table.select('tr')) == 2: away_team = table.select('tr')[0].td.string away_team = unicode(away_team).strip() home_team = table.select('tr')[1].td.string home_team = unicode(home_team).strip() sql = 'select * from matchinfo,team a,team b where matchtime=%s and a.abbr=%s and b.abbr=%s and matchinfo.teamf=a.tid and matchinfo.teaml=b.tid' cursor = conn.cursor() cursor.execute(sql, (match_date, away_team, home_team)) values = cursor.fetchall() print values if len(values) != 0: return else: log(url_path + 'CANNOT FOUND TEAM INFO') else: log(url_path + 'CANNOT FOUND TEAM INFO') nbaGIQtrScrs = soup.find(id='nbaGIQtrScrs') if type(nbaGIQtrScrs) != types.NoneType: table = nbaGIQtrScrs.find('table') if type(table) != types.NoneType: if len(table.select('tr')) == 3: td_score = table.select('tr')[0].select('td') for td in td_score: point = td.string away_team_point.append(point) td_score = table.select('tr')[2].select('td') for td in td_score: point = td.string home_team_point.append(point) else: log(url_path + 'CANNOT FOUND TEAM INFO') else: log(url_path + 'CANNOT FOUND TEAM INFO') else: log(url_path + 'CANNOT FOUND TEAM INFO') nbaGIboxscore = soup.find(id='nbaGIboxscore') away_player_point = [] home_player_point = [] if type(nbaGIboxscore) != types.NoneType: nodes = nbaGIboxscore.find_all(id='nbaGITeamStats') if len(nodes) == 2: node = nodes[0] print away_team print away_team_point playerNodes = node.select('tr') for player_node in playerNodes: if player_node.attrs.has_key('class'): if player_node['class'][0] == 'odd' or player_node['class'][0] == 'even': tr_score = get_tr_score(player_node) if tr_score != 'Total': away_player_point.append(tr_score) node = nodes[1] print home_team print home_team_point playerNodes = node.select('tr') for player_node in playerNodes: if player_node.attrs.has_key('class'): if player_node['class'][0] == 'odd' or player_node['class'][0] == 'even': tr_score = get_tr_score(player_node) if tr_score != 'Total': home_player_point.append(tr_score) else: log(url_path + 'CANNOT FOUND TWO TEAM') return else: log(url_path + 'NOT FOUND BOXSCORE') return cursor = conn.cursor() sql = 'select tid from team where abbr=%s' cursor.execute(sql, (away_team,)) teamf = cursor.fetchall()[0][0] cursor.execute(sql, (home_team,)) teaml = cursor.fetchall()[0][0] sql = 'insert into matchinfo VALUES (NULL ,%s, %s, %s, %s,%s,%s,%s)' season = get_season(match_date) print season, teamf, teaml data1 = ( season, season + 1, match_date, teamf, teaml, str(away_team_point[-1]).strip(), str(home_team_point[-1]).strip()) cursor.close() cursor = conn.cursor() cursor.execute(sql, data1) sql = 'SELECT LAST_INSERT_ID()' cursor.close() cursor = conn.cursor() cursor.execute(sql) values = cursor.fetchall() mid = int(values[0][0]) sql = 'insert into matchscore VALUES (%s,%s,%s,%s)' data_scores = [] for i in range(0, len(away_team_point) - 1, 1): data_scores.append((mid, i + 1, str(away_team_point[i]).strip(), str(home_team_point[i]).strip())) cursor.close() cursor = conn.cursor() cursor.executemany(sql, data_scores) cursor.close() # write file # if not (os.path.exists('./data/' + get_season(match_date))): # os.mkdir('./data/' + get_season(match_date)) # file_name = match_date[5:10] + '_' + away_team + '-' + home_team # data_file = open('./data/' + get_season(match_date) + '/' + file_name, 'w') # line1 = match_date[5:10] + ';' + away_team + '-' + home_team + ';' + away_team_point[-1] + '-' + home_team_point[ # -1] + ';' # data_file.write(line1 + '\n') # line2 = '' # length = len(away_team_point) # for i in range(0, length - 1, 1): # line2 += away_team_point[i] + '-' + home_team_point[i] + ';' # data_file.write(line2 + '\n') # data_file.write(away_team + '\n') players = [] for player_point in away_player_point: print player_point sql = 'select pid from player where display_name_en=%s order by birthday' player = get_player_score(player_point) cursor = conn.cursor() cursor.execute(sql,(player[0],)) cursor.close() pid = cursor.fetchall() if len(pid) == 0: sql = 'insert into player (pid,display_name_en) VALUES (NULL,%s)' cursor = conn.cursor() cursor.execute(sql,(player[0],)) cursor.close() sql = 'SELECT LAST_INSERT_ID()' cursor = conn.cursor() cursor.execute(sql) pid = cursor.fetchall() cursor.close() pid = pid[0][0] player[0] = pid[0][0] data = [] data.append(mid) data.append(teamf) data.append(pid) for i in range(0,len(player),1): data.append(player[i]) players.append(player) sql = 'insert into playerscore VALUES (%s,)'
def print_url(match_date, format_match_date): url_path = 'http://www.nba.com/gameline/' + match_date + '/' print url_path html = get_response(url_path) if html: soup = BeautifulSoup(html) game_set = set() for tag in soup.find_all(href=re.compile("^/games/" + match_date + "/")): if tag.string == 'recap': game_set.add(tag['href']) for tag in game_set: get_game_info(tag, format_match_date) pass sql = 'select max(matchtime) from matchinfo' conn = get_connector() cursor = conn.cursor() cursor.execute(sql) values = cursor.fetchall() date = values[0][0] step = datetime.date(2014, 1, 2) - datetime.date(2014, 1, 1) today = datetime.date.today() while date < today: print_url(date.strftime("%Y%m%d"), date.isoformat()) date = date + step
now_period = int(now_period) if now_period > period: period = now_period continue if status == 3: sql = 'update next_match set status = 3 where gameid = %s' cursor = conn.cursor() cursor.execute(sql,(gameid,)) cursor.close() conn.commit() conn.close() return conn.close() while True: conn = get_connector() sql = 'select * from next_match where status!=3' cursor = conn.cursor() cursor.execute(sql) values = cursor.fetchall() cursor.close() conn.close() for value in values: game_date_time = value[1] now_date_time = datetime.datetime.now() if now_date_time > game_date_time: if value[0] in games: pass else: p = Process(target=game_live, args=(get_connector(),value[0])) p.start()