def runTest(self): database_session = MlbDatabase(DB_PATH).open_session() entry = PostgameHitterGameEntry() entry.rotowire_id = '10972' database_session.close()
def add_pitcher_entries(): database_session = MlbDatabase(DB_PATH).open_session() starting_pitcher_1 = PitcherEntry('Andrew', 'Cashner', '10468') database_session.add(starting_pitcher_1) starting_pitcher_2 = PitcherEntry('Jeff', 'Samardzija', '9374') database_session.add(starting_pitcher_2) database_session.commit() database_session.close()
def mine_pregame_stats(): """ Mine the hitter/pitcher stats and predict the outcomes and commit to the database session :param mlb_database: MlbDatabase object """ database_session = MlbDatabase().open_session() games = get_game_lineups(database_session) update_ids(games, database_session) get_pregame_hitting_stats(games) get_pregame_pitching_stats(games) database_session.close()
def runTest(self): database_session = MlbDatabase(DB_PATH).open_session() entry = PostgamePitcherGameEntry() entry.rotowire_id = '10468' entry.game_date = date.today() entry.game_time = date.today().ctime() entry.home_team = CommitNewPregamePitcherTest.HOME_TEAM database_session.close()
def setUp(self): add_pitcher_entry() database_session = MlbDatabase(DB_PATH).open_session() entry = GameEntry(date.today(), date.today().ctime(), CommitNewPregamePitcherTest.HOME_TEAM, 'MIA') database_session.add(entry) database_session.commit() database_session.close()
def setUp(self): add_hitter_entry() add_pitcher_entry() database_session = MlbDatabase(DB_PATH).open_session() entry = GameEntry(date.today(), date.today().ctime(), 'SD', 'BOS') database_session.add(entry) database_session.commit() database_session.close()
def get_opponent_vector(self): database_session = MlbDatabase().open_session() # Get the hitters he is facing as well hitter_postgame_entries = database_session.query( PregameHitterGameEntry).filter( PregameHitterGameEntry.game_date == self.game_date, PregameHitterGameEntry.game_time == self.game_time, PregameHitterGameEntry.home_team == self.home_team, PregameHitterGameEntry.is_home_team != self.is_home_team) hitter_array = np.array(np.zeros(31)) for hitter_entry in hitter_postgame_entries: hitter_array += hitter_entry.to_input_vector_raw() database_session.close() return PregameHitterGameEntry.avg_input_vector(hitter_array)
def add_hitter_entry(): database_session = MlbDatabase(DB_PATH).open_session() hitter = HitterEntry('Wil', 'Myers', '10972') hitter.baseball_reference_id = 'myerswi01' hitter.team = 'SD' hitter.batting_hand = 'R' database_session.add(hitter) database_session.commit() database_session.close()
def runTest(self): database_session = MlbDatabase(DB_PATH).open_session() starting_pitcher_1 = PitcherEntry('Andrew', 'Cashner', '10468') starting_pitcher_1.baseball_reference_id = 'cashnan01' starting_pitcher_1.home_team = 'MIA' starting_pitcher_1.pitching_hand = 'R' database_session.add(starting_pitcher_1) database_session.commit() database_session.close()
def runTest(self): database_session = MlbDatabase(DB_PATH).open_session() entry = PregameHitterGameEntry() entry.rotowire_id = '10972' entry.game_date = date.today() entry.game_time = date.today().ctime() entry.home_team = CommitNewPregameHitterTest.HOME_TEAM database_session.add(entry) database_session.commit() database_session.close()
def runTest(self): database_session = MlbDatabase(DB_PATH).open_session() lineup = LineupEntry() lineup.starting_pitcher_1 = '10468' lineup.starting_pitcher_2 = '9374' lineup.catcher = '8047' lineup.first_baseman = '8620' lineup.second_baseman = '6508' lineup.third_baseman = '9862' lineup.shortstop = '11437' lineup.outfielder_1 = '7140' lineup.outfielder_2 = '13190' lineup.outfielder_3 = '7618' lineup.game_date = date.today() lineup.game_time = date.today().ctime() database_session.add(lineup) database_session.commit() database_session.close()
import os from sql.mlb_database import MlbDatabase from mine.rotowire import mine_pregame_stats from mine.draft_kings import Draftkings from datetime import date, timedelta, datetime from email_service import send_email import cProfile os.chdir("/home/cameron/workspaces/MlbDatabase/mlb_scrape/Released/mlbscrape_python") databaseSession = MlbDatabase().open_session() try: mine_pregame_stats() Draftkings.save_daily_csv() csv_dict = Draftkings.get_csv_dict() Draftkings.update_salaries(databaseSession, csv_dict) Draftkings.predict_daily_points(databaseSession, date.today()) optimal_lineup = Draftkings.get_optimal_lineup(databaseSession, date.today()) print optimal_lineup send_email(optimal_lineup.__str__()) except Exception as e: print e send_email("The predictor generated an exception: {0}".format(e)) databaseSession.close()
def __init__(self): self._database_session = MlbDatabase().open_session() self._decision_tree = None
class PitcherRegressionForestTrainer(RegressionForest): SIZE_TRAINING_BATCH = 900 def __init__(self): self._database_session = MlbDatabase().open_session() self._decision_tree = None def get_stochastic_batch(self, input_query, num_samples=None): potential_samples = list() for postgame_entry in input_query: pregame_entry = self._database_session.query( PregamePitcherGameEntry).get( (postgame_entry.rotowire_id, postgame_entry.game_date, postgame_entry.game_time)) if pregame_entry is not None: potential_samples.append((pregame_entry, postgame_entry)) else: print "Can't find %s %s %s %s" % ( postgame_entry.rotowire_id, postgame_entry.home_team, postgame_entry.game_date, postgame_entry.game_time) if num_samples is None: num_samples = len(potential_samples) player_samples = random.sample([itm for itm in potential_samples], num_samples) x = list() y = list() for item in player_samples: pregame_entry = item[0] postgame_entry = item[1] input_vector = pregame_entry.to_input_vector() if pregame_entry.game_entry is None: print "NoneType game entry for %s %s %s %s" % ( pregame_entry.rotowire_id, pregame_entry.home_team, pregame_entry.game_date, pregame_entry.game_time) continue if pregame_entry.game_entry.umpire is None: umpire_vector = UmpireCareerEntry.get_nominal_data( self._database_session) else: ump_entry = self._database_session.query( UmpireCareerEntry).get(pregame_entry.game_entry.umpire) if ump_entry is None: umpire_vector = UmpireCareerEntry.get_nominal_data( self._database_session) else: umpire_vector = ump_entry.to_input_vector() game_datetime = datetime.datetime.strptime(pregame_entry.game_date, "%Y-%m-%d") park_factors = self._database_session.query(ParkEntry).get( (pregame_entry.home_team, "2016")) if park_factors is None: print "Pitcher regression forest: Could not find %s from %s" % ( pregame_entry.home_team, "2016") park_vector = np.array([100, 100]) else: park_vector = park_factors.to_input_vector() final_pitcher_array = np.concatenate([ input_vector, pregame_entry.get_opponent_vector(), park_vector, umpire_vector ]) x.append(final_pitcher_array.tolist()) y.append([postgame_entry.actual_draftkings_points]) return x, y def train_network(self): """ Pure virtual method for training the network """ self.load_model() if self._decision_tree is None: self._decision_tree = RandomForestRegressor(n_estimators=1000) db_query = self._database_session.query(PostgamePitcherGameEntry) mlb_training_data, mlb_evaluation_data = self.get_train_eval_data( db_query, 0.8) x_train, y_train = self.get_stochastic_batch(mlb_training_data) self._decision_tree.fit(x_train, np.ravel(y_train)) self.save_model() x_eval, y_eval = self.get_stochastic_batch(mlb_evaluation_data) y_eval_predictions = self._decision_tree.predict(x_eval) y_eval_predictions = np.array(y_eval_predictions) y_eval = np.array(y_eval) print "Pitcher Training Size: %i | Pitcher Evaluation Size: %i" % ( len(x_train), len(x_eval)) print "Pitcher median absolute error: %f" % median_absolute_error( y_eval, y_eval_predictions) self._database_session.close() def get_prediction(self, input_data): return self._decision_tree.predict([input_data]) def get_prediction_interval(self, input_data, percentile=95): preds = [] for pred in self._decision_tree.estimators_: preds.append(pred.predict(input_data.reshape(1, len(input_data)))) err_down = np.percentile(preds, (100 - percentile) / 2.) err_up = np.percentile(preds, 100 - (100 - percentile) / 2.) return err_down, err_up def get_std_dev(self, input_data): preds = [] for pred in self._decision_tree.estimators_: preds.append(pred.predict(input_data.reshape(1, len(input_data)))) return np.std(preds) def save_model(self): try: joblib.dump(self._decision_tree, 'pitcher_regression_forest.pkl') except: pass def load_model(self): try: self._decision_tree = joblib.load('pitcher_regression_forest.pkl') except: pass
def __init__(self): self._decision_tree = DecisionTreeRegressor() self._database_session = MlbDatabase().open_session()
from learn.train_network import HitterNetworkTrainer, PitcherNetworkTrainer from sql.mlb_database import MlbDatabase from learn.train_regression import * databaseSession = MlbDatabase().open_session() hitter_regression_trainer = HitterRegressionForestTrainer() hitter_regression_trainer.train_network() pitcher_regression_trainer = PitcherRegressionForestTrainer() pitcher_regression_trainer.train_network()
from sql.mlb_database import MlbDatabase from sql.lineup import LineupEntry from sql.postgame_hitter import PostgameHitterGameEntry from sql.pregame_hitter import PregameHitterGameEntry from sql.postgame_pitcher import PostgamePitcherGameEntry from sql.pregame_pitcher import PregamePitcherGameEntry from datetime import date, timedelta from numpy import array, std, mean database_session = MlbDatabase().open_session() query_results = database_session.query(LineupEntry).filter( LineupEntry.game_date != date.today()) lineup_predicted_salary = 0 lineup_actual_salary = 0 lineup_actual_vector = list() lineup_predicted_vector = list() for query_result in query_results: try: #TODO: fix these gets by using the GameEntry to get the game time and such # TODO: this needs to be altered to accommodate double headers, but is not a big priority lineup_actual_salary += database_session.query( PostgameHitterGameEntry).get( (query_result.catcher, query_result.game_date)).actual_draftkings_points lineup_actual_salary += database_session.query( PostgamePitcherGameEntry).get( (query_result.starting_pitcher_1, query_result.game_date)).actual_draftkings_points lineup_actual_salary += database_session.query( PostgamePitcherGameEntry).get(
from learn.train_network import HitterNetworkTrainer, PitcherNetworkTrainer from sql.mlb_database import MlbDatabase from learn.train_regression import HitterRegressionTrainer mlbDatabase = MlbDatabase() databaseSession = mlbDatabase.open_session() #hitter_network_trainer = HitterNetworkTrainer(databaseSession) #hitter_network_trainer.train_network() hitter_regression_trainer = HitterRegressionTrainer(databaseSession) hitter_regression_trainer.train_network() #pitcher_network_trainer = PitcherNetworkTrainer(databaseSession) #pitcher_network_trainer.train_network()
from sql.mlb_database import MlbDatabase from email_service import send_email from mine.stat_miner import LineupMiner try: mlbDatabase = MlbDatabase() lineup_miner = LineupMiner(lineup=None, opposing_pitcher=None, game_date=None, game_time=None, db_path=None, is_home=False) lineup_miner.mine_yesterdays_results() send_email("Mine postgame completed.") except Exception as e: print e send_email("The predictor generated an exception: {0}".format(e))
def get_pregame_pitching_stats_wrapper(game): database_session = MlbDatabase().open_session() current_pitcher = game.away_pitcher print "Mining %s." % current_pitcher.name try: pregame_pitcher_entry = get_pitcher_stats(current_pitcher.rotowire_id, current_pitcher.team, game.home_pitcher.team, database_session) predict_draftkings_points(pregame_pitcher_entry) database_session.add(pregame_pitcher_entry) database_session.commit() except IntegrityError: print "Attempt to duplicate pitcher entry: %s %s %s" % ( current_pitcher.name, pregame_pitcher_entry.team, pregame_pitcher_entry.opposing_team) database_session.rollback() except PitcherNotFound as e: print e current_pitcher = game.home_pitcher print "Mining %s." % current_pitcher.name try: pregame_pitcher_entry = get_pitcher_stats(current_pitcher.rotowire_id, current_pitcher.team, game.away_pitcher.team, database_session) predict_draftkings_points(pregame_pitcher_entry) database_session.add(pregame_pitcher_entry) database_session.commit() except IntegrityError: print "Attempt to duplicate pitcher entry: %s %s %s" % ( current_pitcher.name, pregame_pitcher_entry.team, pregame_pitcher_entry.opposing_team) database_session.rollback() except PitcherNotFound as e: print e database_session.close()
def get_pregame_hitting_stats_wrapper(game): database_session = MlbDatabase().open_session() for current_hitter in game.away_lineup: pitcher_hand = game.home_pitcher.hand print "Mining %s." % current_hitter.name try: pregame_hitter_entry = get_hitter_stats( current_hitter.rotowire_id, game.home_pitcher.rotowire_id, current_hitter.team, pitcher_hand, database_session) pregame_hitter_entry.game_date = date.today() pregame_hitter_entry.opposing_team = game.home_pitcher.team predict_draftkings_points(pregame_hitter_entry) database_session.add(pregame_hitter_entry) database_session.commit() except HitterNotFound as e: print e except IntegrityError: print "Attempt to duplicate hitter entry: %s %s %s" % ( current_hitter.name, pregame_hitter_entry.team, pregame_hitter_entry.opposing_team) database_session.rollback() for current_hitter in game.home_lineup: pitcher_hand = game.away_pitcher.hand print "Mining %s." % current_hitter.name try: pregame_hitter_entry = get_hitter_stats( current_hitter.rotowire_id, game.away_pitcher.rotowire_id, current_hitter.team, pitcher_hand, database_session) pregame_hitter_entry.game_date = date.today() pregame_hitter_entry.opposing_team = game.away_pitcher.team predict_draftkings_points(pregame_hitter_entry) database_session.add(pregame_hitter_entry) database_session.commit() except HitterNotFound as e: print e except IntegrityError: print "Attempt to duplicate hitter entry: %s %s %s" % ( current_hitter.name, pregame_hitter_entry.team, pregame_hitter_entry.opposing_team) database_session.rollback() database_session.close()
def get_pregame_hitting_stats_wrapper(game): database_session = MlbDatabase().open_session() for current_hitter in game.away_lineup: pitcher_hand = game.home_pitcher.hand print "Mining %s." % current_hitter.name try: pregame_hitter_entry = get_hitter_stats(current_hitter.rotowire_id, game.home_pitcher.rotowire_id, current_hitter.team, pitcher_hand, database_session) pregame_hitter_entry.game_date = date.today() pregame_hitter_entry.opposing_team = game.home_pitcher.team predict_draftkings_points(pregame_hitter_entry) database_session.add(pregame_hitter_entry) database_session.commit() except HitterNotFound as e: print e except IntegrityError: print "Attempt to duplicate hitter entry: %s %s %s" % (current_hitter.name, pregame_hitter_entry.team, pregame_hitter_entry.opposing_team) database_session.rollback() for current_hitter in game.home_lineup: pitcher_hand = game.away_pitcher.hand print "Mining %s." % current_hitter.name try: pregame_hitter_entry = get_hitter_stats(current_hitter.rotowire_id, game.away_pitcher.rotowire_id, current_hitter.team, pitcher_hand, database_session) pregame_hitter_entry.game_date = date.today() pregame_hitter_entry.opposing_team = game.away_pitcher.team predict_draftkings_points(pregame_hitter_entry) database_session.add(pregame_hitter_entry) database_session.commit() except HitterNotFound as e: print e except IntegrityError: print "Attempt to duplicate hitter entry: %s %s %s" % (current_hitter.name, pregame_hitter_entry.team, pregame_hitter_entry.opposing_team) database_session.rollback() database_session.close()
def add_hitter_entries(): """ Just add some hitter entries to the test database to satisfy foreign key constraints """ database_session = MlbDatabase(DB_PATH).open_session() catcher = HitterEntry('Russell', 'Martin', '8047') database_session.add(catcher) first_baseman = HitterEntry('Ryan', 'Zimmerman', '8620') database_session.add(first_baseman) second_baseman = HitterEntry('Chase', 'Utley', '6508') database_session.add(second_baseman) third_baseman = HitterEntry('Josh', 'Donaldson', '9862') database_session.add(third_baseman) shortstop = HitterEntry('Manny', 'Machado', '11437') database_session.add(shortstop) outfielder_1 = HitterEntry('Melvin', 'Upton Jr.', '7140') database_session.add(outfielder_1) outfielder_2 = HitterEntry('Jose', 'Peraza', '13190') database_session.add(outfielder_2) outfielder_3 = HitterEntry('Nick', 'Markakis', '7618') database_session.add(outfielder_3) database_session.commit() database_session.close()
from mine.beautiful_soup_helper import get_soup_from_url from datetime import date, timedelta from sqlalchemy.exc import IntegrityError from sqlalchemy.orm.exc import FlushError import datetime from sql.umpire import UmpireCareerEntry from mine.baseball_reference import get_team_info from mine.team_dict import * import re from bs4 import Comment from sql.team_park import ParkEntry from sqlalchemy import or_, and_ base_url = "http://gd2.mlb.com/components/game/mlb/year_2017/month_" database_session = MlbDatabase().open_session() def get_game_day_urls(game_date): game_urls = list() day_url = base_url + '%02d' % game_date.month + "/" + "day_" + '%02d' % game_date.day soup = get_soup_from_url(day_url) game_links = soup.findAll("a") for game_link in game_links: game_string = str(game_link.text).strip() if game_string.startswith("gid"): game_string = day_url + "/" + game_string + "linescore.xml" game_urls.append(game_string) print game_string return game_urls
from sql.mlb_database import MlbDatabase from mine.rotowire import mine_yesterdays_results mlbDatabase = MlbDatabase() databaseSession = mlbDatabase.open_session() mine_yesterdays_results(databaseSession) databaseSession.close()
def get_pregame_pitching_stats_wrapper(game): database_session = MlbDatabase().open_session() current_pitcher = game.away_pitcher print "Mining %s." % current_pitcher.name try: pregame_pitcher_entry = get_pitcher_stats(current_pitcher.rotowire_id, current_pitcher.team, game.home_pitcher.team, database_session) predict_draftkings_points(pregame_pitcher_entry) database_session.add(pregame_pitcher_entry) database_session.commit() except IntegrityError: print "Attempt to duplicate pitcher entry: %s %s %s" % (current_pitcher.name, pregame_pitcher_entry.team, pregame_pitcher_entry.opposing_team) database_session.rollback() except PitcherNotFound as e: print e current_pitcher = game.home_pitcher print "Mining %s." % current_pitcher.name try: pregame_pitcher_entry = get_pitcher_stats(current_pitcher.rotowire_id, current_pitcher.team, game.away_pitcher.team, database_session) predict_draftkings_points(pregame_pitcher_entry) database_session.add(pregame_pitcher_entry) database_session.commit() except IntegrityError: print "Attempt to duplicate pitcher entry: %s %s %s" % (current_pitcher.name, pregame_pitcher_entry.team, pregame_pitcher_entry.opposing_team) database_session.rollback() except PitcherNotFound as e: print e database_session.close()