import numpy as np from statsmodels import api as sm from sklearn.model_selection import StratifiedKFold from sklearn.metrics import log_loss from sklearn.preprocessing import LabelBinarizer from statcast.bip import Bip from statcast.better.sm import BetterGLM, BetterMNLogit from statcast.better.spark import cross_val_predict from statcast.tools.plot import plotPrecRec, plotPrecRecMN, plotResiduals # %% bip = Bip(years=(2016, ), n_jobs=-1) # %% xLabels = ['hit_speed', 'hit_angle', 'sprayAngle'] fancyLabels = ['Exit Velocity', 'Launch Angle', 'Spray Angle'] units = ['mph', 'degrees', 'degrees'] yLabel = 'events' subData = bip.data.loc[~bip.data['exclude'], xLabels + [yLabel]] outs = [ 'Bunt Groundout', 'Double Play', 'Fielders Choice', 'Fielders Choice Out', 'Flyout', 'Forceout', 'Grounded Into DP', 'Groundout', 'Lineout', 'Pop Out', 'Runner Out', 'Sac Bunt', 'Sac Fly', 'Sac Fly DP', 'Triple Play', 'Bunt Pop Out', 'Bunt Lineout', 'Sacrifice Bunt DP'
from matplotlib import pyplot as plt from statcast.bip import Bip from statcast.tools.plot import correlationPlot from statcast.better.utils import findTrainSplit # %% Plot correlation of imputing model years = (2016, 2015) labels = ['Exit Velocity', 'Launch Angle', 'Hit Distance'] units = ['mph', 'degrees', 'feet'] for year in years: bip = Bip(years=(year, ), n_jobs=-1) testData = bip.data.loc[~bip.data.exclude & ~bip.data.scImputed, :] testY = bip.scImputer.createY(testData) testYp = bip.scImputer.predictD(testData) labelsYr = ['{} {}'.format(label, year) for label in labels] figs = correlationPlot(testY, testYp, labels=labelsYr, units=units, ms=0.7) for fig, label in zip(figs, labels): fig.savefig('{} Correlation {}'.format(label, year)) # %% Plot Tree Curve
# %% Imports from scipy import stats from matplotlib import pyplot as plt from statcast.bip import Bip from statcast.plot import plotMLBLogos from statcast.tools.plot import addText # %% bip15 = Bip(years=(2015, ), n_jobs=-1) bip16 = Bip(years=(2016, ), n_jobs=-1) # %% Plot Correlations labels = ['hit_speed', 'hit_angle', 'hit_distance_sc'] units = ['mph', 'degrees', 'feet'] fancyLabels = ['Exit Velocity', 'Launch Angle', 'Hit Distance'] for i, (label, unit, fancyLabel) in enumerate(zip(labels, units, fancyLabels)): if '(scImputed||home_team)' in bip15.scFactorMdl.formulas[i]: x = bip15.scFactorMdl.factors_[label]['home_team']['(Intercept)'] + \ bip15.scFactorMdl.factors_[label]['home_team']['scImputedFALSE'] missing15 = False else: x = bip15.scFactorMdl.factors_[label]['home_team']['(Intercept)'] missing15 = True if '(scImputed||home_team)' in bip16.scFactorMdl.formulas[i]: y = bip16.scFactorMdl.factors_[label]['home_team']['(Intercept)'] + \ bip16.scFactorMdl.factors_[label]['home_team']['scImputedFALSE']
import datetime import requests from pyspark import SparkContext from statcast.bip import Bip # %% Create Spark Context sc = SparkContext(appName="post5") # %% Load data, plot histograms of statcast data years = (2015, 2016) for year in years: bip = Bip(years=(year, ), n_jobs=sc) bip.plotSCHistograms() # %% Transfer results to S3 instanceID = requests. \ get('http://169.254.169.254/latest/meta-data/instance-id').text dtStr = datetime.datetime.utcnow().strftime('%Y-%m-%d--%H-%M-%S') os.system('aws s3 sync . s3://mf-first-bucket/output/{}/{}'.format( instanceID, dtStr)) # %% Stop Spark Context sc.stop()