def outcome(mu1, mu2, min, up, outcome): # Sloppy if (min > 90): if (outcome=="draw"): p = 1.0 else: p = 0.0 return(p) if (min <= 45): # 1st half time_r = (90.0-min)+stoppage_1reg+stoppage_2reg elif (min <= 90): # 2nd half time_r = (90.0-min)+stoppage_2reg ft = time_r/(90.0+stoppage_1reg+stoppage_2reg) if (outcome=="draw"): p = skellam.pmf(-up, mu1*ft, mu2*ft) elif (outcome == "lose"): p = skellam.cdf(-1-up, mu1*ft, mu2*ft) else: p = skellam.cdf(-1+up, mu2*ft, mu1*ft) return(p)
def predict_skellam_1x2(mu1, mu2): """Get 1x2 probabilities (home, draw, away) for Poisson goal rates (mu1, mu2) using Skellam distribution.""" p_2 = skellam.cdf(-1, mu1=mu1, mu2=mu2) p_x2 = skellam.cdf(0, mu1=mu1, mu2=mu2) p_1 = 1.0 - p_x2 p_x = p_x2 - p_2 return np.column_stack((p_1, p_x, p_2))
def overtime(mu1, mu2, min, up, outcome): # Sloppy if (min < 90): aup = 0 amin = 90 else: aup = up amin = min if (amin <= 105): # 1st extra time time_r = (120.0-amin)+stoppage_1ot+stoppage_2ot elif (amin <= 120): # 2nd extra time time_r = (120.0-amin)+stoppage_2ot ft = time_r/(30.0+stoppage_1ot+stoppage_2ot) if (outcome=="draw"): p = skellam.pmf(-aup, ft*mu1*ot_ft, ft*mu2*ot_ft) elif (outcome == "lose"): p = skellam.cdf(-1-aup, ft*mu1*ot_ft, ft*mu2*ot_ft) else: p = skellam.cdf(-1+aup, ft*mu2*ot_ft, ft*mu1*ot_ft) return(p)
def BuildPoissonModels(hist_data, feature_list, comp_data=None): ''' Build score predictions via (linear) poisson regression. ''' hist_data_1 = hist_data[["team_1_score"] + feature_list] hist_data_2 = hist_data[["team_2_score"] + feature_list] formula_1 = "team_1_score ~ " + " + ".join(feature_list) formula_2 = "team_2_score ~ " + " + ".join(feature_list) # using the GEE package along with independance assumptions to fit poisson model. # Am assuming this is using a maximum likleyhood approach? fam = Poisson() ind = Independence() model_1 = GEE.from_formula(formula_1, "team_1_score", hist_data, cov_struct=ind, family=fam) model_2 = GEE.from_formula(formula_2, "team_2_score", hist_data, cov_struct=ind, family=fam) model_1_fit = model_1.fit() model_2_fit = model_2.fit() print(model_1_fit.summary()) hist_data['team_1_score_pred'] = model_1_fit.predict(hist_data) hist_data['team_2_score_pred'] = model_2_fit.predict(hist_data) # return historical data if comp_data wasn't passed. if comp_data is None: return hist_data # prepare comp data comp_data['team_1_score_pred'] = model_1_fit.predict( comp_data[feature_list]) comp_data['team_2_score_pred'] = model_2_fit.predict( comp_data[feature_list]) comp_data['team_1_prob'] = comp_data[[ 'team_1_score_pred', 'team_2_score_pred' ]].apply( lambda x: 1 - skellam.cdf(0, x['team_1_score_pred'], x[ 'team_2_score_pred']), 1) comp_data['team_tie_prob'] = comp_data[[ 'team_1_score_pred', 'team_2_score_pred' ]].apply( lambda x: skellam.pmf(0, x['team_1_score_pred'], x['team_2_score_pred'] ), 1) comp_data['team_2_prob'] = comp_data[[ 'team_1_score_pred', 'team_2_score_pred' ]].apply( lambda x: skellam.cdf(-1, x['team_1_score_pred'], x['team_2_score_pred' ]), 1) return hist_data, comp_data
def predict(self): hometeam = self.hometeam awayteam = self.awayteam homefield_advantage = 0 if self.neutral_field else HOMEFIELD_GOAL_ADV / 2 # Calculate the expected goals for the game home_exp = hometeam.o_rating * awayteam.d_rating / PPG + homefield_advantage away_exp = awayteam.o_rating * hometeam.d_rating / PPG - homefield_advantage home_adv_portion = home_exp / (home_exp + away_exp) * homefield_advantage home_exp = home_exp + home_adv_portion away_exp = away_exp - home_adv_portion # calculate chances of results home_win = skellam.sf(0, home_exp, away_exp) away_win = skellam.cdf(-1, home_exp, away_exp) tie = 1 - home_win - away_win return { hometeam.name: home_win * 3 + tie, awayteam.name: away_win * 3 + tie }
def expected_result(elo_a, elo_b, winning_margin): """ https://en.wikipedia.org/wiki/Elo_rating_system#Mathematical_details """ px = skellam.cdf(winning_margin, elo_a, elo_b) pwm = skellam.pmf(winning_margin, elo_a, elo_b) expect_a = (px + pwm * 0.5) - 0.3 return expect_a
def oddspredict2(fixtures, att_params, def_params, hmean, amean): resultodds = [] neutralscore = (hmean+amean)/2 for j in range(len(fixtures)): lamda = neutralscore * att_params[fixtures[j,0]] * def_params[fixtures[j,1]] mu = neutralscore * att_params[fixtures[j,1]] * def_params[fixtures[j,0]] px = skellam.cdf(-1, lamda, mu) p0 = skellam.pmf(0, lamda, mu) resultodds.append(px+p0*0.5) return resultodds
def calculateStrategicUtilities(self, passedCandidates, passedElectors, MIN_UTIL, iteration): electorID = self.ID nCandidates = len(passedCandidates) self.allVotes = GlobalFuncs.countVoteIntentions(passedElectors, \ passedCandidates,iteration) self.chosenCandidate = self.chooseCandidate(passedCandidates, iteration) self.othersVotes = self.allVotes self.othersVotes[self.chosenCandidate.ID] = \ self.othersVotes[self.chosenCandidate.ID] - 1 for rowIndex in range(0,nCandidates): for colIndex in range(0,nCandidates): if rowIndex == colIndex: self.tieProbs[rowIndex,colIndex] = 1 self.pivotalityProbs[rowIndex,colIndex] = 1 self.winnerProbs[rowIndex,colIndex] = 1 else: skellamA = self.othersVotes[rowIndex] skellamB = self.othersVotes[colIndex] if skellamA == 0: skellamA = 10**-100 if skellamB == 0: skellamB = 10**-100 self.tieProbs[rowIndex,colIndex] = skellam.pmf(0,skellamA,skellamB) self.pivotalityProbs[rowIndex,colIndex] = skellam.pmf(-1,skellamA,skellamB) self.winnerProbs[rowIndex,colIndex] = 1 - skellam.cdf(-1,skellamA,skellamB) #UNCOMMENT ONLY IN CASE OF PROBLEMS WITH 0 ENTRIES############### #for rowIndex in range(0,nCandidates): # for colIndex in range(0,nCandidates): # if math.isnan(self.tieProbs[rowIndex,colIndex]): # self.tieProbs[rowIndex,colIndex] = 0 # if math.isnan(self.pivotalityProbs[rowIndex,colIndex]): # self.pivotalityProbs[rowIndex,colIndex] = 0 # if math.isnan(self.winnerProbs[rowIndex,colIndex]): # self.winnerProbs[rowIndex,colIndex] = 0 ################################################################# for rowIndex in range(0,nCandidates): for colIndex in range(0,nCandidates): if rowIndex != colIndex: probsWoutPair = np.delete(self.winnerProbs,rowIndex,0) probsWOutPair = np.delete(probsWoutPair,colIndex,1) probsProd = np.prod(probsWOutPair) otherPivsSum = self.pivotalityProbs[rowIndex,colIndex] + self.winnerProbs[rowIndex,colIndex] self.pivotalities[rowIndex,colIndex] = probsProd * otherPivsSum if iteration == 0: self.previousUtilities = self.sincereUtilities else: self.previousUtilities = self.strategicUtilities for cand in range(0,nCandidates): for otherCand in range(0,nCandidates): utilityDiff = self.previousUtilities[cand] - self.sincereUtilities[otherCand] self.newUtilDiff[otherCand] = utilityDiff * self.pivotalities[cand,otherCand] self.newUtilitySum[cand] = np.sum(self.newUtilDiff) self.newUtilitySum[np.argmin(self.sincereUtilities)] = MIN_UTIL self.strategicUtilities = self.newUtilitySum return self.strategicUtilities
def test_skellam_gh11474(): # test issue reported in gh-11474 caused by `cdfchn` mu = [1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000] cdf = skellam.cdf(0, mu, mu) # generated in R # library(skellam) # options(digits = 16) # mu = c(1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000) # pskellam(0, mu, mu, TRUE) cdf_expected = [0.6542541612768356, 0.5448901559424127, 0.5141135799745580, 0.5044605891382528, 0.5019947363350450, 0.5019848365953181, 0.5019750827993392, 0.5019466621805060, 0.5018209330219539] assert_allclose(cdf, cdf_expected)
def BuildPoissonXGBTree(hist_data, feature_list, comp_data=None): ''' Build score predictions via (tree based) poisson regression. ''' dtrain_1 = xgb.DMatrix(data=np.matrix(hist_data[feature_list]), label=np.array(hist_data["team_1_score"]), feature_names=feature_list) dtrain_2 = xgb.DMatrix(data=np.matrix(hist_data[feature_list]), label=np.array(hist_data["team_2_score"]), feature_names=feature_list) param_1 = { 'max_depth': 2, 'eta': 0.1, 'silent': 1, 'objective': 'count:poisson' } param_1['nthread'] = 8 param_1['eval_metric'] = 'poisson-nloglik' param_2 = { 'max_depth': 2, 'eta': 0.1, 'silent': 1, 'objective': 'count:poisson' } param_2['nthread'] = 8 param_2['eval_metric'] = 'poisson-nloglik' #evallist_1 = [(dtrain, 'train'),(dtest, 'test')] evallist_1 = [(dtrain_1, 'train')] #evallist_2 = [(dtrain, 'train'),(dtest, 'test')] evallist_2 = [(dtrain_2, 'train')] num_round = 100 bst_1 = xgb.train(param_1, dtrain_1, num_round, evallist_1) bst_2 = xgb.train(param_2, dtrain_2, num_round, evallist_2) ypred_1 = bst_1.predict(dtrain_1) ypred_2 = bst_2.predict(dtrain_2) hist_data["team_1_score_pred"] = ypred_1 hist_data["team_2_score_pred"] = ypred_2 #hist_data[['team_1_score','team_1_score_pred','team_2_score','team_2_score_pred']] if comp_data is None: return hist_data dcomp = xgb.DMatrix(data=np.matrix(comp_data[feature_list]), feature_names=feature_list) # prepare comp data comp_data['team_1_score_pred'] = bst_1.predict(dcomp) comp_data['team_2_score_pred'] = bst_2.predict(dcomp) comp_data['team_1_prob'] = comp_data[[ 'team_1_score_pred', 'team_2_score_pred' ]].apply( lambda x: 1 - skellam.cdf(0, x['team_1_score_pred'], x[ 'team_2_score_pred']), 1) comp_data['team_tie_prob'] = comp_data[[ 'team_1_score_pred', 'team_2_score_pred' ]].apply( lambda x: skellam.pmf(0, x['team_1_score_pred'], x['team_2_score_pred'] ), 1) comp_data['team_2_prob'] = comp_data[[ 'team_1_score_pred', 'team_2_score_pred' ]].apply( lambda x: skellam.cdf(-1, x['team_1_score_pred'], x['team_2_score_pred' ]), 1) return hist_data, comp_data
x = np.arange(skellam.ppf(0.01, mu1, mu2), skellam.ppf(0.99, mu1, mu2)) ax.plot(x, skellam.pmf(x, mu1, mu2), 'bo', ms=8, label='skellam pmf') ax.vlines(x, 0, skellam.pmf(x, mu1, mu2), colors='b', lw=5, alpha=0.5) # Alternatively, the distribution object can be called (as a function) # to fix the shape and location. This returns a "frozen" RV object holding # the given parameters fixed. # Freeze the distribution and display the frozen ``pmf``: rv = skellam(mu1, mu2) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') ax.legend(loc='best', frameon=False) plt.show() # Check accuracy of ``cdf`` and ``ppf``: prob = skellam.cdf(x, mu1, mu2) np.allclose(x, skellam.ppf(prob, mu1, mu2)) # True # Generate random numbers: r = skellam.rvs(mu1, mu2, size=1000)
def prob_win(n): z=np.zeros((n+1,4801)) for i in np.arange(n+1): z[i,:]=skellam.cdf(i,new_mean,new_mean) return z
def bootstrap_result_from_frequency_table(self,freq_table,**kwargs): assert type(freq_table) == pd.DataFrame df = freq_table bootstrap_samples = 5000 logging.debug('freq_table is\n' + str(df.head())) #Testing responses for NCS # df_test = df.copy() # df_test = df_test.reset_index() # logging.debug("Sample of responses for NCS freq_table\n" + str(df_test.ix[df_test['question_code']=='NCS',:].head())) # logging.debug("Sample of responses for CSI1 freq_table\n" + str(df_test.ix[df_test['question_code']=='CSI1',:].head())) #End testing responses assert {'sample_size','strong_count','weak_count','comp_sample_size','comp_strong_count','comp_weak_count'} <= set(df.columns) df['aggregation_value'] = '' df['result_type'] = 'significance_value' df['pop_1_sample_size'] = df.comp_sample_size - df.sample_size df['pop_1_strong_count'] = df.comp_strong_count - df.strong_count df['pop_1_weak_count'] = df.comp_weak_count - df.weak_count df['pop_2_sample_size'] = df.sample_size df['pop_2_strong_count'] = df.strong_count df['pop_2_weak_count'] = df.weak_count df.ix[df.pop_1_sample_size == 0,'aggregation_value'] = 'N'#Meaning that subset is identical to the comparison df.ix[df.sample_size < 5,'aggregation_value'] = 'S' df_no_agg_value = df.ix[df.aggregation_value == '',:] # dist_1 = pd.DataFrame(poisson.ppf(0.75,df_no_agg_value.pop_2_strong_count), index = df_no_agg_value.index) # dist_2 = pd.DataFrame(poisson.ppf(0.75,df_no_agg_value.pop_2_weak_count), index = df_no_agg_value.index) # print("df is\n"+ str(df)) # print(df_no_agg_value.pop_2_strong_count) # print(dist_1) # print(dist_2) df_no_agg_value['use_skellam'] = 1#This effectively ensures that skellam is always used. Change to 0 to sometimes use bootstrap # df_small = pd.DataFrame(df.ix[df.sample_size < 5,:],columns=['aggregation_value','result_type']) # if len(dist_1.index) > 0: # pass # # df_no_agg_value['sum_of_count_distributions'] = dist_1 + dist_2 # # df_no_agg_value.ix[df_no_agg_value.sum_of_count_distributions < (df_no_agg_value.pop_2_sample_size * 1.1),'use_skellam'] = 1 # else: # return df_small df_skellam = df_no_agg_value.ix[df_no_agg_value.use_skellam==1] if len(df_skellam.index) > 0: df_skellam['mu1'] = (df_skellam.pop_1_strong_count / df_skellam.pop_1_sample_size) * df_skellam.pop_2_sample_size df_skellam['mu2'] = (df_skellam.pop_1_weak_count / df_skellam.pop_1_sample_size) * df_skellam.pop_2_sample_size df_skellam['obs'] = df_skellam.pop_2_strong_count - df_skellam.pop_2_weak_count df_skellam['p'] = pd.DataFrame(skellam.cdf(df_skellam.obs, df_skellam.mu1, df_skellam.mu2), index=df_skellam.index) df_skellam.ix[df_skellam.p > 0.975,'aggregation_value'] = 'H' df_skellam.ix[df_skellam.p < 0.025,'aggregation_value'] = 'L' df_bootstrap = df_no_agg_value.ix[df_no_agg_value.use_skellam==0] for index_item in df_bootstrap.index: pop_1_sample_size = df_bootstrap.ix[index_item,'pop_1_sample_size'] pop_1_strong_count = df_bootstrap.ix[index_item,'pop_1_strong_count'] pop_1_weak_count = df_bootstrap.ix[index_item,'pop_1_weak_count'] pop_2_sample_size = df_bootstrap.ix[index_item,'sample_size'] pop_2_strong_count = df_bootstrap.ix[index_item,'strong_count'] pop_2_weak_count = df_bootstrap.ix[index_item,'weak_count'] #Create arrays of strong counts pop_1_rand_strong_counts = [] if pop_1_strong_count == pop_1_sample_size or pop_1_strong_count == 0: pop_1_rand_strong_counts = [pop_1_strong_count for i in range(bootstrap_samples)] else: pop_1_rand_strong_counts = np.random.binomial(pop_1_sample_size,pop_1_strong_count/pop_1_sample_size,bootstrap_samples) pop_2_rand_strong_counts = [] if pop_2_strong_count == pop_2_sample_size or pop_2_strong_count == 0: pop_2_rand_strong_counts = [pop_2_strong_count for i in range(bootstrap_samples)] else: pop_2_rand_strong_counts = np.random.binomial(pop_2_sample_size,pop_2_strong_count/pop_2_sample_size,bootstrap_samples) #Generate leftover weak percents pop_1_leftover_weak_p = 0 if pop_1_sample_size > pop_1_strong_count: pop_1_leftover_weak_p = pop_1_weak_count / ( pop_1_sample_size - pop_1_strong_count ) pop_2_leftover_weak_p = 0 if pop_2_sample_size > pop_2_strong_count: pop_2_leftover_weak_p = pop_2_weak_count / ( pop_2_sample_size - pop_2_strong_count ) #Generate weak and net values for each population pop_1_rand_weak_counts = [] for pop_1_rand_strong in pop_1_rand_strong_counts: if pop_1_leftover_weak_p == 0 or pop_1_leftover_weak_p == 1 or pop_1_sample_size == pop_1_rand_strong: pop_1_rand_weak_counts.append(pop_1_sample_size - pop_1_rand_strong) else: pop_1_rand_weak_counts.append(np.random.binomial(pop_1_sample_size - pop_1_rand_strong,pop_1_leftover_weak_p,1)) pop_2_rand_weak_counts = [] for pop_2_rand_strong in pop_2_rand_strong_counts: if pop_2_leftover_weak_p == 0 or pop_2_leftover_weak_p == 1 or pop_2_sample_size == pop_2_rand_strong: pop_2_rand_weak_counts.append(pop_2_sample_size - pop_2_rand_strong) else: pop_2_rand_weak_counts.append(np.random.binomial(pop_2_sample_size - pop_2_rand_strong,pop_2_leftover_weak_p,1)) #Assemble nets bs = pd.DataFrame({ 'pop_1_strong':pop_1_rand_strong_counts, 'pop_1_weak':pop_1_rand_weak_counts, 'pop_2_strong':pop_2_rand_strong_counts, 'pop_2_weak':pop_2_rand_weak_counts}) bs['pop_1_net'] = (bs.pop_1_strong - bs.pop_1_weak) / pop_1_sample_size bs['pop_2_net'] = (bs.pop_2_strong - bs.pop_2_weak) / pop_2_sample_size #Determine greater percents bs['pop_2_greater'] = 0 bs.ix[bs.pop_1_net < bs.pop_2_net,'pop_2_greater'] = 1 pop_2_greater_percent = bs.pop_2_greater.mean() if pop_2_greater_percent > 0.975: df_bootstrap.ix[index_item,'aggregation_value'] = 'H' if pop_2_greater_percent < 0.025: df_bootstrap.ix[index_item,'aggregation_value'] = 'L' # logging.debug("df_small is\n" + str(df.ix[df.sample_size < 5,:])) df_small = pd.DataFrame(df.ix[df['aggregation_value'].isin(['S','N']),:],columns=['aggregation_value','result_type']) df_skellam = pd.DataFrame(df_skellam,columns=['aggregation_value','result_type']) df_bootstrap = pd.DataFrame(df_bootstrap,columns=['aggregation_value','result_type']) logging.debug('df_small is\n' + str(df_small.head()) + 'df_skellam is\n' + str(df_skellam.head()) + 'df_bootstrap is\n' + str(df_bootstrap.head())) return pd.concat([df_small,df_skellam,df_bootstrap])
def p_stockout(arr_rate, dep_rate, stock, T): return skellam.cdf(-stock, max(T * arr_rate, 1e-5), max(T * dep_rate, 1e-5))