示例#1
0
    def runTest(self):
        database_session = MlbDatabase(DB_PATH).open_session()

        entry = PostgameHitterGameEntry()
        entry.rotowire_id = '10972'

        database_session.close()
示例#2
0
def add_pitcher_entries():
    database_session = MlbDatabase(DB_PATH).open_session()

    starting_pitcher_1 = PitcherEntry('Andrew', 'Cashner', '10468')
    database_session.add(starting_pitcher_1)
    starting_pitcher_2 = PitcherEntry('Jeff', 'Samardzija', '9374')
    database_session.add(starting_pitcher_2)

    database_session.commit()
    database_session.close()
示例#3
0
def mine_pregame_stats():
    """ Mine the hitter/pitcher stats and predict the outcomes and commit to the database session
    :param mlb_database: MlbDatabase object
    """
    database_session = MlbDatabase().open_session()
    games = get_game_lineups(database_session)
    update_ids(games, database_session)
    get_pregame_hitting_stats(games)
    get_pregame_pitching_stats(games)
    database_session.close()
示例#4
0
    def runTest(self):
        database_session = MlbDatabase(DB_PATH).open_session()

        entry = PostgamePitcherGameEntry()
        entry.rotowire_id = '10468'
        entry.game_date = date.today()
        entry.game_time = date.today().ctime()
        entry.home_team = CommitNewPregamePitcherTest.HOME_TEAM

        database_session.close()
示例#5
0
def mine_pregame_stats():
    """ Mine the hitter/pitcher stats and predict the outcomes and commit to the database session
    :param mlb_database: MlbDatabase object
    """
    database_session = MlbDatabase().open_session()
    games = get_game_lineups(database_session)
    update_ids(games, database_session)
    get_pregame_hitting_stats(games)
    get_pregame_pitching_stats(games)
    database_session.close()
示例#6
0
    def setUp(self):
        add_pitcher_entry()
        database_session = MlbDatabase(DB_PATH).open_session()
        entry = GameEntry(date.today(), date.today().ctime(), CommitNewPregamePitcherTest.HOME_TEAM, 'MIA')
        database_session.add(entry)
        database_session.commit()

        database_session.close()
示例#7
0
    def setUp(self):
        add_hitter_entry()
        add_pitcher_entry()
        database_session = MlbDatabase(DB_PATH).open_session()

        entry = GameEntry(date.today(), date.today().ctime(), 'SD', 'BOS')
        database_session.add(entry)
        database_session.commit()

        database_session.close()
示例#8
0
    def get_opponent_vector(self):

        database_session = MlbDatabase().open_session()
        # Get the hitters he is facing as well
        hitter_postgame_entries = database_session.query(
            PregameHitterGameEntry).filter(
                PregameHitterGameEntry.game_date == self.game_date,
                PregameHitterGameEntry.game_time == self.game_time,
                PregameHitterGameEntry.home_team == self.home_team,
                PregameHitterGameEntry.is_home_team != self.is_home_team)

        hitter_array = np.array(np.zeros(31))
        for hitter_entry in hitter_postgame_entries:
            hitter_array += hitter_entry.to_input_vector_raw()

        database_session.close()

        return PregameHitterGameEntry.avg_input_vector(hitter_array)
示例#9
0
def add_hitter_entry():
    database_session = MlbDatabase(DB_PATH).open_session()

    hitter = HitterEntry('Wil', 'Myers', '10972')
    hitter.baseball_reference_id = 'myerswi01'
    hitter.team = 'SD'
    hitter.batting_hand = 'R'

    database_session.add(hitter)
    database_session.commit()

    database_session.close()
示例#10
0
    def runTest(self):
        database_session = MlbDatabase(DB_PATH).open_session()

        starting_pitcher_1 = PitcherEntry('Andrew', 'Cashner', '10468')
        starting_pitcher_1.baseball_reference_id = 'cashnan01'
        starting_pitcher_1.home_team = 'MIA'
        starting_pitcher_1.pitching_hand = 'R'

        database_session.add(starting_pitcher_1)
        database_session.commit()

        database_session.close()
示例#11
0
    def runTest(self):
        database_session = MlbDatabase(DB_PATH).open_session()

        entry = PregameHitterGameEntry()
        entry.rotowire_id = '10972'
        entry.game_date = date.today()
        entry.game_time = date.today().ctime()
        entry.home_team = CommitNewPregameHitterTest.HOME_TEAM

        database_session.add(entry)
        database_session.commit()

        database_session.close()
示例#12
0
    def runTest(self):
        database_session = MlbDatabase(DB_PATH).open_session()

        lineup = LineupEntry()
        lineup.starting_pitcher_1 = '10468'
        lineup.starting_pitcher_2 = '9374'
        lineup.catcher = '8047'
        lineup.first_baseman = '8620'
        lineup.second_baseman = '6508'
        lineup.third_baseman = '9862'
        lineup.shortstop = '11437'
        lineup.outfielder_1 = '7140'
        lineup.outfielder_2 = '13190'
        lineup.outfielder_3 = '7618'
        lineup.game_date = date.today()
        lineup.game_time = date.today().ctime()

        database_session.add(lineup)
        database_session.commit()

        database_session.close()
示例#13
0
import os
from sql.mlb_database import MlbDatabase
from mine.rotowire import mine_pregame_stats
from mine.draft_kings import Draftkings
from datetime import date, timedelta, datetime
from email_service import send_email
import cProfile

os.chdir("/home/cameron/workspaces/MlbDatabase/mlb_scrape/Released/mlbscrape_python")

databaseSession = MlbDatabase().open_session()

try:
    mine_pregame_stats()
    Draftkings.save_daily_csv()
    csv_dict = Draftkings.get_csv_dict()
    Draftkings.update_salaries(databaseSession, csv_dict)
    Draftkings.predict_daily_points(databaseSession, date.today())
    optimal_lineup = Draftkings.get_optimal_lineup(databaseSession, date.today())
    print optimal_lineup
    send_email(optimal_lineup.__str__())
except Exception as e:
    print e
    send_email("The predictor generated an exception: {0}".format(e))

databaseSession.close()

示例#14
0
 def __init__(self):
     self._database_session = MlbDatabase().open_session()
     self._decision_tree = None
示例#15
0
class PitcherRegressionForestTrainer(RegressionForest):

    SIZE_TRAINING_BATCH = 900

    def __init__(self):
        self._database_session = MlbDatabase().open_session()
        self._decision_tree = None

    def get_stochastic_batch(self, input_query, num_samples=None):
        potential_samples = list()
        for postgame_entry in input_query:
            pregame_entry = self._database_session.query(
                PregamePitcherGameEntry).get(
                    (postgame_entry.rotowire_id, postgame_entry.game_date,
                     postgame_entry.game_time))
            if pregame_entry is not None:
                potential_samples.append((pregame_entry, postgame_entry))
            else:
                print "Can't find %s %s %s %s" % (
                    postgame_entry.rotowire_id, postgame_entry.home_team,
                    postgame_entry.game_date, postgame_entry.game_time)
        if num_samples is None:
            num_samples = len(potential_samples)
        player_samples = random.sample([itm for itm in potential_samples],
                                       num_samples)
        x = list()
        y = list()
        for item in player_samples:
            pregame_entry = item[0]
            postgame_entry = item[1]
            input_vector = pregame_entry.to_input_vector()

            if pregame_entry.game_entry is None:
                print "NoneType game entry for %s %s %s %s" % (
                    pregame_entry.rotowire_id, pregame_entry.home_team,
                    pregame_entry.game_date, pregame_entry.game_time)
                continue

            if pregame_entry.game_entry.umpire is None:
                umpire_vector = UmpireCareerEntry.get_nominal_data(
                    self._database_session)
            else:
                ump_entry = self._database_session.query(
                    UmpireCareerEntry).get(pregame_entry.game_entry.umpire)

                if ump_entry is None:
                    umpire_vector = UmpireCareerEntry.get_nominal_data(
                        self._database_session)
                else:
                    umpire_vector = ump_entry.to_input_vector()

            game_datetime = datetime.datetime.strptime(pregame_entry.game_date,
                                                       "%Y-%m-%d")
            park_factors = self._database_session.query(ParkEntry).get(
                (pregame_entry.home_team, "2016"))
            if park_factors is None:
                print "Pitcher regression forest: Could not find %s from %s" % (
                    pregame_entry.home_team, "2016")
                park_vector = np.array([100, 100])
            else:
                park_vector = park_factors.to_input_vector()

            final_pitcher_array = np.concatenate([
                input_vector,
                pregame_entry.get_opponent_vector(), park_vector, umpire_vector
            ])
            x.append(final_pitcher_array.tolist())
            y.append([postgame_entry.actual_draftkings_points])

        return x, y

    def train_network(self):
        """ Pure virtual method for training the network
        """
        self.load_model()
        if self._decision_tree is None:
            self._decision_tree = RandomForestRegressor(n_estimators=1000)
            db_query = self._database_session.query(PostgamePitcherGameEntry)
            mlb_training_data, mlb_evaluation_data = self.get_train_eval_data(
                db_query, 0.8)
            x_train, y_train = self.get_stochastic_batch(mlb_training_data)
            self._decision_tree.fit(x_train, np.ravel(y_train))
            self.save_model()
            x_eval, y_eval = self.get_stochastic_batch(mlb_evaluation_data)
            y_eval_predictions = self._decision_tree.predict(x_eval)
            y_eval_predictions = np.array(y_eval_predictions)
            y_eval = np.array(y_eval)
            print "Pitcher Training Size: %i | Pitcher Evaluation Size: %i" % (
                len(x_train), len(x_eval))
            print "Pitcher median absolute error: %f" % median_absolute_error(
                y_eval, y_eval_predictions)
        self._database_session.close()

    def get_prediction(self, input_data):
        return self._decision_tree.predict([input_data])

    def get_prediction_interval(self, input_data, percentile=95):
        preds = []
        for pred in self._decision_tree.estimators_:
            preds.append(pred.predict(input_data.reshape(1, len(input_data))))
        err_down = np.percentile(preds, (100 - percentile) / 2.)
        err_up = np.percentile(preds, 100 - (100 - percentile) / 2.)
        return err_down, err_up

    def get_std_dev(self, input_data):
        preds = []
        for pred in self._decision_tree.estimators_:
            preds.append(pred.predict(input_data.reshape(1, len(input_data))))
        return np.std(preds)

    def save_model(self):
        try:
            joblib.dump(self._decision_tree, 'pitcher_regression_forest.pkl')
        except:
            pass

    def load_model(self):
        try:
            self._decision_tree = joblib.load('pitcher_regression_forest.pkl')
        except:
            pass
示例#16
0
 def __init__(self):
     self._decision_tree = DecisionTreeRegressor()
     self._database_session = MlbDatabase().open_session()
示例#17
0
from learn.train_network import HitterNetworkTrainer, PitcherNetworkTrainer
from sql.mlb_database import MlbDatabase
from learn.train_regression import *

databaseSession = MlbDatabase().open_session()

hitter_regression_trainer = HitterRegressionForestTrainer()
hitter_regression_trainer.train_network()

pitcher_regression_trainer = PitcherRegressionForestTrainer()
pitcher_regression_trainer.train_network()

示例#18
0
from sql.mlb_database import MlbDatabase
from sql.lineup import LineupEntry
from sql.postgame_hitter import PostgameHitterGameEntry
from sql.pregame_hitter import PregameHitterGameEntry
from sql.postgame_pitcher import PostgamePitcherGameEntry
from sql.pregame_pitcher import PregamePitcherGameEntry
from datetime import date, timedelta
from numpy import array, std, mean

database_session = MlbDatabase().open_session()

query_results = database_session.query(LineupEntry).filter(
    LineupEntry.game_date != date.today())
lineup_predicted_salary = 0
lineup_actual_salary = 0
lineup_actual_vector = list()
lineup_predicted_vector = list()
for query_result in query_results:
    try:
        #TODO: fix these gets by using the GameEntry to get the game time and such
        # TODO: this needs to be altered to accommodate double headers, but is not a big priority
        lineup_actual_salary += database_session.query(
            PostgameHitterGameEntry).get(
                (query_result.catcher,
                 query_result.game_date)).actual_draftkings_points
        lineup_actual_salary += database_session.query(
            PostgamePitcherGameEntry).get(
                (query_result.starting_pitcher_1,
                 query_result.game_date)).actual_draftkings_points
        lineup_actual_salary += database_session.query(
            PostgamePitcherGameEntry).get(
示例#19
0
from learn.train_network import HitterNetworkTrainer, PitcherNetworkTrainer
from sql.mlb_database import MlbDatabase
from learn.train_regression import HitterRegressionTrainer

mlbDatabase = MlbDatabase()
databaseSession = mlbDatabase.open_session()

#hitter_network_trainer = HitterNetworkTrainer(databaseSession)
#hitter_network_trainer.train_network()

hitter_regression_trainer = HitterRegressionTrainer(databaseSession)
hitter_regression_trainer.train_network()



#pitcher_network_trainer = PitcherNetworkTrainer(databaseSession)
#pitcher_network_trainer.train_network()

示例#20
0
from sql.mlb_database import MlbDatabase
from email_service import send_email
from mine.stat_miner import LineupMiner

try:
    mlbDatabase = MlbDatabase()
    lineup_miner = LineupMiner(lineup=None,
                               opposing_pitcher=None,
                               game_date=None,
                               game_time=None,
                               db_path=None,
                               is_home=False)
    lineup_miner.mine_yesterdays_results()
    send_email("Mine postgame completed.")
except Exception as e:
    print e
    send_email("The predictor generated an exception: {0}".format(e))
示例#21
0
def get_pregame_pitching_stats_wrapper(game):
    database_session = MlbDatabase().open_session()

    current_pitcher = game.away_pitcher
    print "Mining %s." % current_pitcher.name
    try:
        pregame_pitcher_entry = get_pitcher_stats(current_pitcher.rotowire_id,
                                                  current_pitcher.team,
                                                  game.home_pitcher.team,
                                                  database_session)

        predict_draftkings_points(pregame_pitcher_entry)
        database_session.add(pregame_pitcher_entry)
        database_session.commit()
    except IntegrityError:
        print "Attempt to duplicate pitcher entry: %s %s %s" % (
            current_pitcher.name, pregame_pitcher_entry.team,
            pregame_pitcher_entry.opposing_team)
        database_session.rollback()
    except PitcherNotFound as e:
        print e

    current_pitcher = game.home_pitcher
    print "Mining %s." % current_pitcher.name
    try:
        pregame_pitcher_entry = get_pitcher_stats(current_pitcher.rotowire_id,
                                                  current_pitcher.team,
                                                  game.away_pitcher.team,
                                                  database_session)

        predict_draftkings_points(pregame_pitcher_entry)
        database_session.add(pregame_pitcher_entry)
        database_session.commit()
    except IntegrityError:
        print "Attempt to duplicate pitcher entry: %s %s %s" % (
            current_pitcher.name, pregame_pitcher_entry.team,
            pregame_pitcher_entry.opposing_team)
        database_session.rollback()
    except PitcherNotFound as e:
        print e

    database_session.close()
示例#22
0
def get_pregame_hitting_stats_wrapper(game):
    database_session = MlbDatabase().open_session()
    for current_hitter in game.away_lineup:
        pitcher_hand = game.home_pitcher.hand
        print "Mining %s." % current_hitter.name
        try:
            pregame_hitter_entry = get_hitter_stats(
                current_hitter.rotowire_id, game.home_pitcher.rotowire_id,
                current_hitter.team, pitcher_hand, database_session)
            pregame_hitter_entry.game_date = date.today()
            pregame_hitter_entry.opposing_team = game.home_pitcher.team
            predict_draftkings_points(pregame_hitter_entry)
            database_session.add(pregame_hitter_entry)
            database_session.commit()
        except HitterNotFound as e:
            print e
        except IntegrityError:
            print "Attempt to duplicate hitter entry: %s %s %s" % (
                current_hitter.name, pregame_hitter_entry.team,
                pregame_hitter_entry.opposing_team)
            database_session.rollback()

    for current_hitter in game.home_lineup:
        pitcher_hand = game.away_pitcher.hand
        print "Mining %s." % current_hitter.name
        try:
            pregame_hitter_entry = get_hitter_stats(
                current_hitter.rotowire_id, game.away_pitcher.rotowire_id,
                current_hitter.team, pitcher_hand, database_session)
            pregame_hitter_entry.game_date = date.today()
            pregame_hitter_entry.opposing_team = game.away_pitcher.team
            predict_draftkings_points(pregame_hitter_entry)
            database_session.add(pregame_hitter_entry)
            database_session.commit()
        except HitterNotFound as e:
            print e
        except IntegrityError:
            print "Attempt to duplicate hitter entry: %s %s %s" % (
                current_hitter.name, pregame_hitter_entry.team,
                pregame_hitter_entry.opposing_team)
            database_session.rollback()

    database_session.close()
示例#23
0
def get_pregame_hitting_stats_wrapper(game):
    database_session = MlbDatabase().open_session()
    for current_hitter in game.away_lineup:
        pitcher_hand = game.home_pitcher.hand
        print "Mining %s." % current_hitter.name
        try:
            pregame_hitter_entry = get_hitter_stats(current_hitter.rotowire_id,
                                                             game.home_pitcher.rotowire_id,
                                                             current_hitter.team,
                                                             pitcher_hand,
                                                             database_session)
            pregame_hitter_entry.game_date = date.today()
            pregame_hitter_entry.opposing_team = game.home_pitcher.team
            predict_draftkings_points(pregame_hitter_entry)
            database_session.add(pregame_hitter_entry)
            database_session.commit()
        except HitterNotFound as e:
            print e
        except IntegrityError:
            print "Attempt to duplicate hitter entry: %s %s %s" % (current_hitter.name,
                                                                   pregame_hitter_entry.team,
                                                                   pregame_hitter_entry.opposing_team)
            database_session.rollback()

    for current_hitter in game.home_lineup:
        pitcher_hand = game.away_pitcher.hand
        print "Mining %s." % current_hitter.name
        try:
            pregame_hitter_entry = get_hitter_stats(current_hitter.rotowire_id,
                                                             game.away_pitcher.rotowire_id,
                                                             current_hitter.team,
                                                             pitcher_hand,
                                                             database_session)
            pregame_hitter_entry.game_date = date.today()
            pregame_hitter_entry.opposing_team = game.away_pitcher.team
            predict_draftkings_points(pregame_hitter_entry)
            database_session.add(pregame_hitter_entry)
            database_session.commit()
        except HitterNotFound as e:
            print e
        except IntegrityError:
            print "Attempt to duplicate hitter entry: %s %s %s" % (current_hitter.name,
                                                                   pregame_hitter_entry.team,
                                                                   pregame_hitter_entry.opposing_team)
            database_session.rollback()

    database_session.close()
示例#24
0
def add_hitter_entries():
    """ Just add some hitter entries to the test database to satisfy foreign key constraints
    """
    database_session = MlbDatabase(DB_PATH).open_session()

    catcher = HitterEntry('Russell', 'Martin', '8047')
    database_session.add(catcher)
    first_baseman = HitterEntry('Ryan', 'Zimmerman', '8620')
    database_session.add(first_baseman)
    second_baseman = HitterEntry('Chase', 'Utley', '6508')
    database_session.add(second_baseman)
    third_baseman = HitterEntry('Josh', 'Donaldson', '9862')
    database_session.add(third_baseman)
    shortstop = HitterEntry('Manny', 'Machado', '11437')
    database_session.add(shortstop)
    outfielder_1 = HitterEntry('Melvin', 'Upton Jr.', '7140')
    database_session.add(outfielder_1)
    outfielder_2 = HitterEntry('Jose', 'Peraza', '13190')
    database_session.add(outfielder_2)
    outfielder_3 = HitterEntry('Nick', 'Markakis', '7618')
    database_session.add(outfielder_3)

    database_session.commit()
    database_session.close()
示例#25
0
from mine.beautiful_soup_helper import get_soup_from_url
from datetime import date, timedelta
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm.exc import FlushError
import datetime
from sql.umpire import UmpireCareerEntry
from mine.baseball_reference import get_team_info
from mine.team_dict import *
import re
from bs4 import Comment
from sql.team_park import ParkEntry
from sqlalchemy import or_, and_

base_url = "http://gd2.mlb.com/components/game/mlb/year_2017/month_"

database_session = MlbDatabase().open_session()


def get_game_day_urls(game_date):
    game_urls = list()
    day_url = base_url + '%02d' % game_date.month + "/" + "day_" + '%02d' % game_date.day
    soup = get_soup_from_url(day_url)
    game_links = soup.findAll("a")
    for game_link in game_links:
        game_string = str(game_link.text).strip()
        if game_string.startswith("gid"):
            game_string = day_url + "/" + game_string + "linescore.xml"
            game_urls.append(game_string)
            print game_string

    return game_urls
示例#26
0
from sql.mlb_database import MlbDatabase
from mine.rotowire import mine_yesterdays_results

mlbDatabase = MlbDatabase()
databaseSession = mlbDatabase.open_session()

mine_yesterdays_results(databaseSession)
databaseSession.close()
示例#27
0
def get_pregame_pitching_stats_wrapper(game):
    database_session = MlbDatabase().open_session()

    current_pitcher = game.away_pitcher
    print "Mining %s." % current_pitcher.name
    try:
        pregame_pitcher_entry = get_pitcher_stats(current_pitcher.rotowire_id,
                                                           current_pitcher.team,
                                                           game.home_pitcher.team,
                                                           database_session)

        predict_draftkings_points(pregame_pitcher_entry)
        database_session.add(pregame_pitcher_entry)
        database_session.commit()
    except IntegrityError:
        print "Attempt to duplicate pitcher entry: %s %s %s" % (current_pitcher.name,
                                                                pregame_pitcher_entry.team,
                                                                pregame_pitcher_entry.opposing_team)
        database_session.rollback()
    except PitcherNotFound as e:
        print e

    current_pitcher = game.home_pitcher
    print "Mining %s." % current_pitcher.name
    try:
        pregame_pitcher_entry = get_pitcher_stats(current_pitcher.rotowire_id,
                                                           current_pitcher.team,
                                                           game.away_pitcher.team,
                                                           database_session)

        predict_draftkings_points(pregame_pitcher_entry)
        database_session.add(pregame_pitcher_entry)
        database_session.commit()
    except IntegrityError:
        print "Attempt to duplicate pitcher entry: %s %s %s" % (current_pitcher.name,
                                                                pregame_pitcher_entry.team,
                                                                pregame_pitcher_entry.opposing_team)
        database_session.rollback()
    except PitcherNotFound as e:
        print e

    database_session.close()