Exemplo n.º 1
0
def outcome(mu1, mu2, min, up, outcome):

# Sloppy

    if (min > 90):
        if (outcome=="draw"):
            p = 1.0
        else:
            p = 0.0
        return(p)

    if (min <= 45): # 1st half
        time_r = (90.0-min)+stoppage_1reg+stoppage_2reg
    elif (min <= 90): # 2nd half
        time_r = (90.0-min)+stoppage_2reg
        
    ft = time_r/(90.0+stoppage_1reg+stoppage_2reg)

    if (outcome=="draw"):
        p = skellam.pmf(-up, mu1*ft, mu2*ft)
    elif (outcome == "lose"):
        p = skellam.cdf(-1-up, mu1*ft, mu2*ft)
    else:
        p = skellam.cdf(-1+up, mu2*ft, mu1*ft)

    return(p)
Exemplo n.º 2
0
def predict_skellam_1x2(mu1, mu2):
    """Get 1x2 probabilities (home, draw, away) for Poisson goal rates (mu1, mu2) using Skellam distribution."""
    p_2 = skellam.cdf(-1, mu1=mu1, mu2=mu2)
    p_x2 = skellam.cdf(0, mu1=mu1, mu2=mu2)
    p_1 = 1.0 - p_x2
    p_x = p_x2 - p_2
    return np.column_stack((p_1, p_x, p_2))
Exemplo n.º 3
0
def overtime(mu1, mu2, min, up, outcome):

# Sloppy

    if (min < 90):
        aup = 0
        amin = 90
    else:
        aup = up
        amin = min
    
    if (amin <= 105): # 1st extra time
        time_r = (120.0-amin)+stoppage_1ot+stoppage_2ot
    elif (amin <= 120): # 2nd extra time
        time_r = (120.0-amin)+stoppage_2ot

    ft = time_r/(30.0+stoppage_1ot+stoppage_2ot)

    if (outcome=="draw"):
        p = skellam.pmf(-aup, ft*mu1*ot_ft, ft*mu2*ot_ft)
    elif (outcome == "lose"):
        p = skellam.cdf(-1-aup, ft*mu1*ot_ft, ft*mu2*ot_ft)
    else:
        p = skellam.cdf(-1+aup, ft*mu2*ot_ft, ft*mu1*ot_ft)

    return(p)
def BuildPoissonModels(hist_data, feature_list, comp_data=None):
    ''' Build score predictions via (linear) poisson regression. '''
    hist_data_1 = hist_data[["team_1_score"] + feature_list]
    hist_data_2 = hist_data[["team_2_score"] + feature_list]

    formula_1 = "team_1_score ~ " + " + ".join(feature_list)
    formula_2 = "team_2_score ~ " + " + ".join(feature_list)

    # using the GEE package along with independance assumptions to fit poisson model.
    # Am assuming this is using a maximum likleyhood approach?
    fam = Poisson()
    ind = Independence()

    model_1 = GEE.from_formula(formula_1,
                               "team_1_score",
                               hist_data,
                               cov_struct=ind,
                               family=fam)
    model_2 = GEE.from_formula(formula_2,
                               "team_2_score",
                               hist_data,
                               cov_struct=ind,
                               family=fam)

    model_1_fit = model_1.fit()
    model_2_fit = model_2.fit()
    print(model_1_fit.summary())

    hist_data['team_1_score_pred'] = model_1_fit.predict(hist_data)
    hist_data['team_2_score_pred'] = model_2_fit.predict(hist_data)

    # return historical data if comp_data wasn't passed.
    if comp_data is None:
        return hist_data

    # prepare comp data
    comp_data['team_1_score_pred'] = model_1_fit.predict(
        comp_data[feature_list])
    comp_data['team_2_score_pred'] = model_2_fit.predict(
        comp_data[feature_list])

    comp_data['team_1_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: 1 - skellam.cdf(0, x['team_1_score_pred'], x[
            'team_2_score_pred']), 1)
    comp_data['team_tie_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: skellam.pmf(0, x['team_1_score_pred'], x['team_2_score_pred']
                              ), 1)
    comp_data['team_2_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: skellam.cdf(-1, x['team_1_score_pred'], x['team_2_score_pred'
                                                            ]), 1)

    return hist_data, comp_data
Exemplo n.º 5
0
    def predict(self):

        hometeam = self.hometeam
        awayteam = self.awayteam

        homefield_advantage = 0 if self.neutral_field else HOMEFIELD_GOAL_ADV / 2

        # Calculate the expected goals for the game
        home_exp = hometeam.o_rating * awayteam.d_rating / PPG + homefield_advantage
        away_exp = awayteam.o_rating * hometeam.d_rating / PPG - homefield_advantage

        home_adv_portion = home_exp / (home_exp +
                                       away_exp) * homefield_advantage

        home_exp = home_exp + home_adv_portion
        away_exp = away_exp - home_adv_portion

        # calculate chances of results
        home_win = skellam.sf(0, home_exp, away_exp)
        away_win = skellam.cdf(-1, home_exp, away_exp)
        tie = 1 - home_win - away_win

        return {
            hometeam.name: home_win * 3 + tie,
            awayteam.name: away_win * 3 + tie
        }
Exemplo n.º 6
0
def expected_result(elo_a, elo_b, winning_margin):
    """
    https://en.wikipedia.org/wiki/Elo_rating_system#Mathematical_details
    """
    px = skellam.cdf(winning_margin, elo_a, elo_b)
    pwm = skellam.pmf(winning_margin, elo_a, elo_b)
    expect_a = (px + pwm * 0.5) - 0.3
    return expect_a
Exemplo n.º 7
0
def oddspredict2(fixtures, att_params, def_params, hmean, amean):
	resultodds = []
	neutralscore = (hmean+amean)/2
	for j in range(len(fixtures)):
		lamda = neutralscore * att_params[fixtures[j,0]] * def_params[fixtures[j,1]]
		mu = neutralscore * att_params[fixtures[j,1]] * def_params[fixtures[j,0]]
		px = skellam.cdf(-1, lamda, mu)
		p0 = skellam.pmf(0, lamda, mu)
		resultodds.append(px+p0*0.5)
	return resultodds
Exemplo n.º 8
0
 def calculateStrategicUtilities(self, passedCandidates, passedElectors, MIN_UTIL, iteration):
     electorID = self.ID
     nCandidates = len(passedCandidates)
     self.allVotes = GlobalFuncs.countVoteIntentions(passedElectors,        \
                                                 passedCandidates,iteration)
     self.chosenCandidate = self.chooseCandidate(passedCandidates, iteration)
     self.othersVotes = self.allVotes
     self.othersVotes[self.chosenCandidate.ID] =                            \
                                self.othersVotes[self.chosenCandidate.ID] - 1
     for rowIndex in range(0,nCandidates):
         for colIndex in range(0,nCandidates):
             if rowIndex == colIndex:
                 self.tieProbs[rowIndex,colIndex] = 1
                 self.pivotalityProbs[rowIndex,colIndex] = 1
                 self.winnerProbs[rowIndex,colIndex] = 1
             else:
                 skellamA = self.othersVotes[rowIndex]
                 skellamB = self.othersVotes[colIndex]
                 if skellamA == 0:
                     skellamA = 10**-100
                 if skellamB == 0:
                     skellamB = 10**-100
                 self.tieProbs[rowIndex,colIndex] = skellam.pmf(0,skellamA,skellamB)
                 self.pivotalityProbs[rowIndex,colIndex] = skellam.pmf(-1,skellamA,skellamB)
                 self.winnerProbs[rowIndex,colIndex] = 1 - skellam.cdf(-1,skellamA,skellamB)
     #UNCOMMENT ONLY IN CASE OF PROBLEMS WITH 0 ENTRIES###############
     #for rowIndex in range(0,nCandidates):
     #    for colIndex in range(0,nCandidates):
     #        if math.isnan(self.tieProbs[rowIndex,colIndex]):
     #            self.tieProbs[rowIndex,colIndex] = 0
     #        if math.isnan(self.pivotalityProbs[rowIndex,colIndex]):
     #            self.pivotalityProbs[rowIndex,colIndex] = 0
     #        if math.isnan(self.winnerProbs[rowIndex,colIndex]):
     #            self.winnerProbs[rowIndex,colIndex] = 0
     #################################################################
     for rowIndex in range(0,nCandidates):
         for colIndex in range(0,nCandidates):
             if rowIndex != colIndex:
                 probsWoutPair = np.delete(self.winnerProbs,rowIndex,0)
                 probsWOutPair = np.delete(probsWoutPair,colIndex,1)
                 probsProd = np.prod(probsWOutPair)
                 otherPivsSum = self.pivotalityProbs[rowIndex,colIndex] + self.winnerProbs[rowIndex,colIndex]
                 self.pivotalities[rowIndex,colIndex] = probsProd * otherPivsSum
     if iteration == 0:
         self.previousUtilities = self.sincereUtilities
     else:
         self.previousUtilities = self.strategicUtilities
     for cand in range(0,nCandidates):
         for otherCand in range(0,nCandidates):
             utilityDiff = self.previousUtilities[cand] - self.sincereUtilities[otherCand]
             self.newUtilDiff[otherCand] = utilityDiff * self.pivotalities[cand,otherCand]
         self.newUtilitySum[cand] = np.sum(self.newUtilDiff)
     self.newUtilitySum[np.argmin(self.sincereUtilities)] = MIN_UTIL
     self.strategicUtilities = self.newUtilitySum
     return self.strategicUtilities
Exemplo n.º 9
0
def test_skellam_gh11474():
    # test issue reported in gh-11474 caused by `cdfchn`
    mu = [1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000]
    cdf = skellam.cdf(0, mu, mu)
    # generated in R
    # library(skellam)
    # options(digits = 16)
    # mu = c(1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000)
    # pskellam(0, mu, mu, TRUE)
    cdf_expected = [0.6542541612768356, 0.5448901559424127, 0.5141135799745580,
                    0.5044605891382528, 0.5019947363350450, 0.5019848365953181,
                    0.5019750827993392, 0.5019466621805060, 0.5018209330219539]
    assert_allclose(cdf, cdf_expected)
def BuildPoissonXGBTree(hist_data, feature_list, comp_data=None):
    ''' Build score predictions via (tree based) poisson regression. '''

    dtrain_1 = xgb.DMatrix(data=np.matrix(hist_data[feature_list]),
                           label=np.array(hist_data["team_1_score"]),
                           feature_names=feature_list)

    dtrain_2 = xgb.DMatrix(data=np.matrix(hist_data[feature_list]),
                           label=np.array(hist_data["team_2_score"]),
                           feature_names=feature_list)

    param_1 = {
        'max_depth': 2,
        'eta': 0.1,
        'silent': 1,
        'objective': 'count:poisson'
    }
    param_1['nthread'] = 8
    param_1['eval_metric'] = 'poisson-nloglik'

    param_2 = {
        'max_depth': 2,
        'eta': 0.1,
        'silent': 1,
        'objective': 'count:poisson'
    }
    param_2['nthread'] = 8
    param_2['eval_metric'] = 'poisson-nloglik'

    #evallist_1 = [(dtrain, 'train'),(dtest, 'test')]
    evallist_1 = [(dtrain_1, 'train')]

    #evallist_2 = [(dtrain, 'train'),(dtest, 'test')]
    evallist_2 = [(dtrain_2, 'train')]

    num_round = 100
    bst_1 = xgb.train(param_1, dtrain_1, num_round, evallist_1)
    bst_2 = xgb.train(param_2, dtrain_2, num_round, evallist_2)

    ypred_1 = bst_1.predict(dtrain_1)
    ypred_2 = bst_2.predict(dtrain_2)

    hist_data["team_1_score_pred"] = ypred_1
    hist_data["team_2_score_pred"] = ypred_2

    #hist_data[['team_1_score','team_1_score_pred','team_2_score','team_2_score_pred']]
    if comp_data is None:
        return hist_data

    dcomp = xgb.DMatrix(data=np.matrix(comp_data[feature_list]),
                        feature_names=feature_list)

    # prepare comp data
    comp_data['team_1_score_pred'] = bst_1.predict(dcomp)
    comp_data['team_2_score_pred'] = bst_2.predict(dcomp)

    comp_data['team_1_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: 1 - skellam.cdf(0, x['team_1_score_pred'], x[
            'team_2_score_pred']), 1)
    comp_data['team_tie_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: skellam.pmf(0, x['team_1_score_pred'], x['team_2_score_pred']
                              ), 1)
    comp_data['team_2_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: skellam.cdf(-1, x['team_1_score_pred'], x['team_2_score_pred'
                                                            ]), 1)

    return hist_data, comp_data
x = np.arange(skellam.ppf(0.01, mu1, mu2), skellam.ppf(0.99, mu1, mu2))
ax.plot(x, skellam.pmf(x, mu1, mu2), 'bo', ms=8, label='skellam pmf')
ax.vlines(x, 0, skellam.pmf(x, mu1, mu2), colors='b', lw=5, alpha=0.5)

# Alternatively, the distribution object can be called (as a function)
# to fix the shape and location. This returns a "frozen" RV object holding
# the given parameters fixed.

# Freeze the distribution and display the frozen ``pmf``:

rv = skellam(mu1, mu2)
ax.vlines(x,
          0,
          rv.pmf(x),
          colors='k',
          linestyles='-',
          lw=1,
          label='frozen pmf')
ax.legend(loc='best', frameon=False)
plt.show()

# Check accuracy of ``cdf`` and ``ppf``:

prob = skellam.cdf(x, mu1, mu2)
np.allclose(x, skellam.ppf(prob, mu1, mu2))
# True

# Generate random numbers:

r = skellam.rvs(mu1, mu2, size=1000)
Exemplo n.º 12
0
def prob_win(n):
    z=np.zeros((n+1,4801))
    for i in np.arange(n+1):
        z[i,:]=skellam.cdf(i,new_mean,new_mean)
    return z
	def bootstrap_result_from_frequency_table(self,freq_table,**kwargs):
		assert type(freq_table) == pd.DataFrame
		df = freq_table
		bootstrap_samples = 5000
		logging.debug('freq_table is\n' + str(df.head()))
		#Testing responses for NCS
		# df_test = df.copy()
		# df_test = df_test.reset_index()
		# logging.debug("Sample of responses for NCS freq_table\n" + str(df_test.ix[df_test['question_code']=='NCS',:].head()))
		# logging.debug("Sample of responses for CSI1 freq_table\n" + str(df_test.ix[df_test['question_code']=='CSI1',:].head()))
		#End testing responses
		assert {'sample_size','strong_count','weak_count','comp_sample_size','comp_strong_count','comp_weak_count'} <= set(df.columns)
		df['aggregation_value'] = ''
		df['result_type'] = 'significance_value'
		df['pop_1_sample_size'] = df.comp_sample_size - df.sample_size
		df['pop_1_strong_count'] = df.comp_strong_count - df.strong_count
		df['pop_1_weak_count'] = df.comp_weak_count - df.weak_count
		df['pop_2_sample_size'] = df.sample_size
		df['pop_2_strong_count'] = df.strong_count
		df['pop_2_weak_count'] = df.weak_count
		df.ix[df.pop_1_sample_size == 0,'aggregation_value'] = 'N'#Meaning that subset is identical to the comparison
		df.ix[df.sample_size < 5,'aggregation_value'] = 'S'

		df_no_agg_value = df.ix[df.aggregation_value == '',:]
		# dist_1 = pd.DataFrame(poisson.ppf(0.75,df_no_agg_value.pop_2_strong_count), index = df_no_agg_value.index)
		# dist_2 = pd.DataFrame(poisson.ppf(0.75,df_no_agg_value.pop_2_weak_count), index = df_no_agg_value.index)
		# print("df is\n"+ str(df))
		# print(df_no_agg_value.pop_2_strong_count)
		# print(dist_1)
		# print(dist_2)
		df_no_agg_value['use_skellam'] = 1#This effectively ensures that skellam is always used. Change to 0 to sometimes use bootstrap
		# df_small = pd.DataFrame(df.ix[df.sample_size < 5,:],columns=['aggregation_value','result_type'])
		# if len(dist_1.index) > 0:
		# 	pass
		# 	# df_no_agg_value['sum_of_count_distributions'] =  dist_1 + dist_2
		# 	# df_no_agg_value.ix[df_no_agg_value.sum_of_count_distributions < (df_no_agg_value.pop_2_sample_size * 1.1),'use_skellam'] = 1
		# else:
		# 	return df_small

		df_skellam = df_no_agg_value.ix[df_no_agg_value.use_skellam==1]
		if len(df_skellam.index) > 0:
			df_skellam['mu1'] = (df_skellam.pop_1_strong_count / df_skellam.pop_1_sample_size) * df_skellam.pop_2_sample_size
			df_skellam['mu2'] = (df_skellam.pop_1_weak_count / df_skellam.pop_1_sample_size) * df_skellam.pop_2_sample_size
			df_skellam['obs'] = df_skellam.pop_2_strong_count - df_skellam.pop_2_weak_count
			df_skellam['p'] = pd.DataFrame(skellam.cdf(df_skellam.obs, df_skellam.mu1, df_skellam.mu2), index=df_skellam.index)
			df_skellam.ix[df_skellam.p > 0.975,'aggregation_value'] = 'H'
			df_skellam.ix[df_skellam.p < 0.025,'aggregation_value'] = 'L'

		df_bootstrap = df_no_agg_value.ix[df_no_agg_value.use_skellam==0]
		for index_item in df_bootstrap.index:
			pop_1_sample_size = df_bootstrap.ix[index_item,'pop_1_sample_size']

			pop_1_strong_count = df_bootstrap.ix[index_item,'pop_1_strong_count']
			pop_1_weak_count = df_bootstrap.ix[index_item,'pop_1_weak_count']

			pop_2_sample_size = df_bootstrap.ix[index_item,'sample_size']
			pop_2_strong_count = df_bootstrap.ix[index_item,'strong_count']
			pop_2_weak_count = df_bootstrap.ix[index_item,'weak_count']

			#Create arrays of strong counts
			pop_1_rand_strong_counts = []

			if pop_1_strong_count == pop_1_sample_size or pop_1_strong_count == 0:
				pop_1_rand_strong_counts = [pop_1_strong_count for i in range(bootstrap_samples)]
			else:
				pop_1_rand_strong_counts = np.random.binomial(pop_1_sample_size,pop_1_strong_count/pop_1_sample_size,bootstrap_samples)

			pop_2_rand_strong_counts = []

			if pop_2_strong_count == pop_2_sample_size or pop_2_strong_count == 0:
				pop_2_rand_strong_counts = [pop_2_strong_count for i in range(bootstrap_samples)]
			else:
				pop_2_rand_strong_counts = np.random.binomial(pop_2_sample_size,pop_2_strong_count/pop_2_sample_size,bootstrap_samples)

			#Generate leftover weak percents
			pop_1_leftover_weak_p = 0
			if pop_1_sample_size > pop_1_strong_count:
				pop_1_leftover_weak_p = pop_1_weak_count / ( pop_1_sample_size - pop_1_strong_count )

			pop_2_leftover_weak_p = 0
			if pop_2_sample_size > pop_2_strong_count:
				pop_2_leftover_weak_p = pop_2_weak_count / ( pop_2_sample_size - pop_2_strong_count )

			#Generate weak and net values for each population
			pop_1_rand_weak_counts = []

			for pop_1_rand_strong in pop_1_rand_strong_counts:
				if pop_1_leftover_weak_p == 0 or pop_1_leftover_weak_p == 1 or pop_1_sample_size == pop_1_rand_strong:
					pop_1_rand_weak_counts.append(pop_1_sample_size - pop_1_rand_strong)
				else:
					pop_1_rand_weak_counts.append(np.random.binomial(pop_1_sample_size - pop_1_rand_strong,pop_1_leftover_weak_p,1))

			pop_2_rand_weak_counts = []

			for pop_2_rand_strong in pop_2_rand_strong_counts:
				if pop_2_leftover_weak_p == 0 or pop_2_leftover_weak_p == 1 or pop_2_sample_size == pop_2_rand_strong:
					pop_2_rand_weak_counts.append(pop_2_sample_size - pop_2_rand_strong)
				else:
					pop_2_rand_weak_counts.append(np.random.binomial(pop_2_sample_size - pop_2_rand_strong,pop_2_leftover_weak_p,1))

			#Assemble nets
			bs = pd.DataFrame({
				'pop_1_strong':pop_1_rand_strong_counts,
				'pop_1_weak':pop_1_rand_weak_counts,
				'pop_2_strong':pop_2_rand_strong_counts,
				'pop_2_weak':pop_2_rand_weak_counts})

			bs['pop_1_net'] = (bs.pop_1_strong - bs.pop_1_weak) / pop_1_sample_size
			bs['pop_2_net'] = (bs.pop_2_strong - bs.pop_2_weak) / pop_2_sample_size

			#Determine greater percents
			bs['pop_2_greater'] = 0
			bs.ix[bs.pop_1_net < bs.pop_2_net,'pop_2_greater'] = 1
			pop_2_greater_percent = bs.pop_2_greater.mean()

			if pop_2_greater_percent > 0.975:
				df_bootstrap.ix[index_item,'aggregation_value'] = 'H'
			if pop_2_greater_percent < 0.025:
				df_bootstrap.ix[index_item,'aggregation_value'] = 'L'
		# logging.debug("df_small is\n" + str(df.ix[df.sample_size < 5,:]))
		df_small = pd.DataFrame(df.ix[df['aggregation_value'].isin(['S','N']),:],columns=['aggregation_value','result_type'])
		df_skellam = pd.DataFrame(df_skellam,columns=['aggregation_value','result_type'])
		df_bootstrap = pd.DataFrame(df_bootstrap,columns=['aggregation_value','result_type'])
		logging.debug('df_small is\n' + str(df_small.head()) + 'df_skellam is\n' + str(df_skellam.head()) +  'df_bootstrap is\n' + str(df_bootstrap.head()))
		return pd.concat([df_small,df_skellam,df_bootstrap])
Exemplo n.º 14
0
def p_stockout(arr_rate, dep_rate, stock, T):
    return skellam.cdf(-stock, max(T * arr_rate, 1e-5),
                       max(T * dep_rate, 1e-5))