示例#1
0
import numpy as np

from statsmodels import api as sm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelBinarizer

from statcast.bip import Bip
from statcast.better.sm import BetterGLM, BetterMNLogit
from statcast.better.spark import cross_val_predict
from statcast.tools.plot import plotPrecRec, plotPrecRecMN, plotResiduals

# %%

bip = Bip(years=(2016, ), n_jobs=-1)

# %%

xLabels = ['hit_speed', 'hit_angle', 'sprayAngle']
fancyLabels = ['Exit Velocity', 'Launch Angle', 'Spray Angle']
units = ['mph', 'degrees', 'degrees']
yLabel = 'events'

subData = bip.data.loc[~bip.data['exclude'], xLabels + [yLabel]]

outs = [
    'Bunt Groundout', 'Double Play', 'Fielders Choice', 'Fielders Choice Out',
    'Flyout', 'Forceout', 'Grounded Into DP', 'Groundout', 'Lineout',
    'Pop Out', 'Runner Out', 'Sac Bunt', 'Sac Fly', 'Sac Fly DP',
    'Triple Play', 'Bunt Pop Out', 'Bunt Lineout', 'Sacrifice Bunt DP'
示例#2
0
from matplotlib import pyplot as plt

from statcast.bip import Bip
from statcast.tools.plot import correlationPlot
from statcast.better.utils import findTrainSplit

# %% Plot correlation of imputing model

years = (2016, 2015)

labels = ['Exit Velocity', 'Launch Angle', 'Hit Distance']
units = ['mph', 'degrees', 'feet']

for year in years:

    bip = Bip(years=(year, ), n_jobs=-1)

    testData = bip.data.loc[~bip.data.exclude & ~bip.data.scImputed, :]

    testY = bip.scImputer.createY(testData)
    testYp = bip.scImputer.predictD(testData)

    labelsYr = ['{} {}'.format(label, year) for label in labels]

    figs = correlationPlot(testY, testYp, labels=labelsYr, units=units, ms=0.7)

    for fig, label in zip(figs, labels):
        fig.savefig('{} Correlation {}'.format(label, year))

# %% Plot Tree Curve
示例#3
0
# %% Imports

from scipy import stats
from matplotlib import pyplot as plt

from statcast.bip import Bip
from statcast.plot import plotMLBLogos
from statcast.tools.plot import addText

# %%

bip15 = Bip(years=(2015, ), n_jobs=-1)
bip16 = Bip(years=(2016, ), n_jobs=-1)

# %% Plot Correlations

labels = ['hit_speed', 'hit_angle', 'hit_distance_sc']
units = ['mph', 'degrees', 'feet']
fancyLabels = ['Exit Velocity', 'Launch Angle', 'Hit Distance']

for i, (label, unit, fancyLabel) in enumerate(zip(labels, units, fancyLabels)):
    if '(scImputed||home_team)' in bip15.scFactorMdl.formulas[i]:
        x = bip15.scFactorMdl.factors_[label]['home_team']['(Intercept)'] + \
            bip15.scFactorMdl.factors_[label]['home_team']['scImputedFALSE']
        missing15 = False
    else:
        x = bip15.scFactorMdl.factors_[label]['home_team']['(Intercept)']
        missing15 = True
    if '(scImputed||home_team)' in bip16.scFactorMdl.formulas[i]:
        y = bip16.scFactorMdl.factors_[label]['home_team']['(Intercept)'] + \
            bip16.scFactorMdl.factors_[label]['home_team']['scImputedFALSE']
示例#4
0
import datetime

import requests
from pyspark import SparkContext

from statcast.bip import Bip

# %% Create Spark Context

sc = SparkContext(appName="post5")

# %% Load data, plot histograms of statcast data

years = (2015, 2016)

for year in years:
    bip = Bip(years=(year, ), n_jobs=sc)
    bip.plotSCHistograms()

# %% Transfer results to S3

instanceID = requests. \
        get('http://169.254.169.254/latest/meta-data/instance-id').text
dtStr = datetime.datetime.utcnow().strftime('%Y-%m-%d--%H-%M-%S')
os.system('aws s3 sync . s3://mf-first-bucket/output/{}/{}'.format(
    instanceID, dtStr))

# %% Stop Spark Context

sc.stop()