def insert_odds(): conn = db_func.get_conn() conn.autocommit = True cur = conn.cursor() query = '''SELECT * FROM bet_type''' cur.execute(query) if len(cur.fetchall()) == 0: sif.insert_bet_type('db_src/bet_type.csv', cur) conn.commit() db_func.truncate_imports(cur) csvs = [f'csv/odds/{f}' for f in listdir('csv/odds') if isfile(join('csv/odds', f))] for csv in tqdm(csvs, colour='red', position=1): sif.insert_to_imports(csv) conn.commit() query = '''ANALYZE imports''' db_func.exec_query(conn, query) sif.imports_to_bets(cur) conn.commit() query = '''ANALYZE odds''' db_func.exec_query(conn, query) sif.imports_to_bets_total(cur) conn.commit() conn.close()
def main(): team = "DAL" #date = "201910230" match_id = '1' #shd.save_boxscore_html(team, date) #html_path = "bs4_html/boxscores/201910230DAL.html" #shd.save_player_data(html_path) #shd.save_match_html('2021') #shd.save_match_data('bs4_html/match_list/2021.html') #shd.save_all_player_performances('2021') # conn=db_func.get_conn() # cur = conn.cursor() # res = imp.get_all_matches(cur) #print(res) if not os.path.isdir("logs"): os.makedirs("logs") logging.basicConfig( level=logging.DEBUG, format= '[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', handlers=[ logging.FileHandler('logs/' + date.today().strftime("%Y-%m-%d") + '.log'), logging.StreamHandler() ]) try: conn = db_func.get_conn() cur = conn.cursor() seasons = ['2021', '2020'] html = 'bs4_html/players/l/luety01.html' bbref_endpoint = '/players/l/luety01.html' player_name = 'ty' db_func.truncate_imports(cur) conn.commit() sif.insert_to_imports('csv/boxscores/200710300GSW.csv') conn.commit() sif.imports_to_player_performance(cur) conn.commit() conn.close() #err = shd.player_data_to_csv(html, bbref_endpoint, player_name) #print(err) #matches = sbrd.get_matches() #print(matches[0:10]) #matches = sbrd.get_matches() #print(len(matches)) #shd.boxscore_to_csv('bs4_html/boxscores/201212150CHI.html') #sif.insert_to_imports('csv/boxscores/201312070UTA.csv') #The page that contains the start of playoff table #only needs rows after it modified #While all pages after only contain playoff games except Exception as err: logging.exception(traceback.print_exception(*sys.exc_info())) sys.exit()
def insert_to_imports(csv): try: conn = db_func.get_conn() cur = conn.cursor() with open(csv, 'r') as f: headers = next(f) headers = headers.lstrip().rstrip().split(',') cur.copy_from(f, 'imports', columns=headers, sep=',') conn.commit() conn.close() except Error as err: raise err
def save_odds(): conn = db_func.get_conn() cur = conn.cursor() query = ''' SELECT season, start_date, end_date FROM season''' cur.execute(query) for season, start_date, end_date in tqdm(cur.fetchall(), colour='cyan', position=1): if season < 2007: continue get_odds(season, start_date, end_date, 'ml') get_odds(season, start_date, end_date, 'ps') get_odds(season, start_date, end_date, 'total') conn.close()
def get_team_abbr(): conn = db_func.get_conn() cur = conn.cursor() query = \ '''SELECT DISTINCT t.team_name, t.team_abbr FROM team_name as t;''' cur.execute(query) team_abbr = {} for key, val in cur.fetchall(): team_abbr[key] = val conn.close() return team_abbr
def get_teams(season=0): #Do not use % operator to format query directly (prone to SQL injections) conn = db_func.get_conn() cur = conn.cursor() if season == 0: query = \ '''SELECT team_abbr FROM team_name''' cur.execute(query) else: query = \ '''SELECT team_abbr FROM team_name WHERE %s >= active AND %s < inactive ;''' cur.execute(query, (season, season)) teams = cur.fetchall() teams = [teams[i][0] for i in range(len(teams))] conn.close() return teams
def get_matches(start_date=datetime.fromisoformat('1900-01-01'), end_date=datetime.fromisoformat('2200-12-31')): ''' get_matches: returns all matches between start_date and end_date (inclusive) Args: param start_date: in the form of 200604010 which represents 2006 april 10 ''' conn = db_func.get_conn() cur = conn.cursor() query = \ '''SELECT m.date, m.bbref_team FROM match as m WHERE %s <= m.date AND %s >= m.date;''' cur.execute(query, (start_date, end_date)) matches = cur.fetchall() conn.close() return matches
def fill_missing_odds(): conn = db_func.get_conn() conn.autocommit = True cur = conn.cursor() db_func.truncate_imports(cur) if not os.path.isdir('csv/sbro_odds/modified'): os.makedirs('csv/sbro_odds/modified') csvs = [f'csv/sbro_odds/{f}' for f in listdir('csv/sbro_odds') if isfile(join('csv/sbro_odds', f))] for csv in tqdm(csvs, colour='red', position=1): modify_sbro_odds(csv) modified_csvs = [f'csv/sbro_odds/modified/{f}' for f in listdir('csv/sbro_odds/modified') if isfile(join('csv/sbro_odds/modified', f))] for csv in tqdm(modified_csvs, colour='red', position=1): sif.insert_to_imports(csv) conn.commit() sif.fill_missing_odds(cur) conn.commit() conn.close()
def init_child(lock_): global lock global conn conn = db_func.get_conn() lock = lock_
def run_scraper(): if not os.path.isdir("logs"): os.makedirs("logs") logging.basicConfig( level=logging.DEBUG, format= '[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', handlers=[ logging.FileHandler('logs/' + date.today().strftime("%Y-%m-%d") + '.log'), logging.StreamHandler() ]) try: start = timer() conn = db_func.get_conn() cur = conn.cursor() conn.autocommit = True team_list_path = 'db_src/NBA_Teams.csv' seasons_path = 'db_src/seasons.csv' seasons = get_all_seasons(cur) if len(seasons) == 0: db_func.truncate_imports(cur) sif.insert_seasons(seasons_path, cur) seasons = get_all_seasons(cur) seasons = [(str(seasons[i][0]), ) for i in range(len(seasons))] if DEBUG: print(seasons) print(len(get_teams(seasons[0][0]))) if len(get_teams(seasons[0][0])) <= 0: db_func.truncate_imports(cur) sif.insert_to_imports(team_list_path) sif.imports_to_team(cur) sif.imports_to_team_name(cur) conn.commit() db_func.truncate_imports(cur) process_players(cur, seasons) conn.commit() db_func.truncate_imports(cur) process_matches(cur, seasons) conn.commit() db_func.truncate_imports(cur) process_boxscores(cur) sif.player_performance_to_injury(cur) conn.commit() logging.info("All players inserted") end = timer() print(f'elapsed time: {end - start}') except Exception as err: logging.exception(traceback.print_exception(*sys.exc_info())) conn.rollback() sys.exit() finally: if (conn): conn.close() print("PostgreSQL connection is closed")