def calculateProbsOfEachGameInASeason(csv_file_name, predict): data, indexToTeam, teamToIndex, indexToGamesPlayed = epl.getData( csv_file_name) game_data = [] for i in range(len(data["Home Team"])): homeTeam = data["Home Team"][i] awayTeam = data["Away Team"][i] homeIndex = teamToIndex[homeTeam] awayIndex = teamToIndex[awayTeam] winnerProp, tieProb, loserProb = predict(homeIndex, awayIndex, i) total = winnerProp + tieProb + loserProb winnerProp = winnerProp / total tieProb = tieProb / total loserProb = loserProb / total game_data.append({ 'Home Team': homeTeam, 'Away Team': awayTeam, 'Round Number': int(data['Round Number'][i]), 'Date': data['Date'][i], 'Location': data['Location'][i], 'home_win': float(winnerProp), 'tie': float(tieProb), 'away_win': float(loserProb), }) return game_data
def createPredictGameFunction(filename): inputs, labels = cdata.getAllMyData([filename], True, 0) data, indexToTeam, teamToIndex, indexToGamesPlayed = epl.getData(filename) model = makeModel(inputs, inputs.shape[1], labels.shape[1]) outputs = model.predict(inputs) def predict(homeIndex, awayIndex, index): output = np.array(outputs[index]) p_output = output / output.sum() return (p_output[0], p_output[2], p_output[1]) return predict
def main(csv_file_name='epl-2021.csv', output_file_name='epl-predictions-stats.json'): model = epl.getData(csv_file_name) data, indexToTeam, teamToIndex, indexToGamesPlayed = model predictor = predictions_tensorflow.createPredictGameFunction(csv_file_name) indexToPlaceFinishedToTimesFinished, indexToMeanPoints, indexToTeam = probs.calculateProbs( 10**5, csv_file_name, predictor) week = 40 A = np.array(epl.getMatrixForSeason(week, model, None)) R = pagerank.rank(A) rankings = [(indexToMeanPoints[i], i) for i in range(len(indexToMeanPoints))] rankings.sort(reverse=True) jsonData = [] for value in rankings: probabilties = indexToPlaceFinishedToTimesFinished[value[1]] jsonData.append({ "name": indexToTeam[value[1]], "probability": list(probabilties), "expected": value[0], "championslegue": probabilties[0] + probabilties[1] + probabilties[2] + probabilties[3], "relegated": probabilties[-1] + probabilties[-2] + probabilties[-3], "pagerank": R[value[1]][0], }) games_data = probs.calculateProbsOfEachGameInASeason( csv_file_name, predictor) with open(output_file_name, 'w') as out: out.write(json.dumps({'teams': list(jsonData), 'games': games_data}))
def getAllMyData( filenames=[ #"old_data/american-football.csv/nfl-2020.csv", #"old_data/american-football.csv/nfl-2019.csv", #"old_data/american-football.csv/nfl-2018.csv", #"old_data/american-football.csv/nfl-2017.csv", #"old_data/american-football.csv/nfl-2016.csv", "old_data/bundesliga-2018.csv", "old_data/bundesliga-2019.csv", "old_data/bundesliga-2020.csv", "old_data/efl-championship-2019.csv", "old_data/efl-championship-2020.csv", "old_data/la-liga-2018.csv", "old_data/la-liga-2019.csv", #"old_data/la-liga-2020.csv", "old_data/ligue-1-2018.csv", "old_data/ligue-1-2019.csv", "old_data/ligue-1-2020.csv", "old_data/serie-a-2017.csv", "old_data/serie-a-2018.csv", "old_data/serie-a-2019.csv", "old_data/serie-a-2020.csv", "old_data/turkey-super-lig-2019.csv", "old_data/turkey-super-lig-2020.csv", "old_data/epl-2009.csv", "old_data/epl-2010.csv", "old_data/epl-2011.csv", "old_data/epl-2012.csv", "old_data/epl-2013.csv", "old_data/epl-2014.csv", "old_data/epl-2015.csv", "old_data/epl-2016.csv", "old_data/epl-2017.csv", "old_data/epl-2018.csv", "old_data/epl-2019.csv", "old_data/epl-2020.csv", "epl-2021.csv", ], getAllInputs=False, starting_week=10): total_inputs = [] total_outputs = [] for filename in filenames: print("READING ", filename) bothFuncs = [ addNewInputAndOutput, addRankToInputs("A"), addRankToInputs("A-week"), addRankToInputs("A-G"), addRankToInputs("A-G-week"), createA("A", 1.0, 0.0, None), createA("A-week", 1.0, 0.0, createWeekMultiplier(0.5)), createA("A-G", 0.0, 1.0, None), createA("A-G-week", 0.0, 1.0, createWeekMultiplier(0.5)), addAverageToInputs(), ] repeatFuncs = [ addNewInputAndOutput, addRankToInputs("A"), addRankToInputs("A-week"), addRankToInputs("A-G"), addRankToInputs("A-G-week"), createA("A", 1.0, 0.0, None), createA("A-week", 1.0, 0.0, createWeekMultiplier(0.5)), createA("A-G", 0.0, 1.0, None), createA("A-G-week", 0.0, 1.0, createWeekMultiplier(0.5)), ] model = epl.getData(filename, False) data = loopThroughData(model, bothFuncs, not (getAllInputs), starting_week) inputs = data["inputs"] outputs = data["outputs"] model = epl.getData(filename, True) data = loopThroughData(model, repeatFuncs, not (getAllInputs), starting_week) inps = data["inputs"] assert (len(inputs) == len(inps)) for i in range(len(inputs)): inputs[i] += inps[i] total_inputs += inputs total_outputs += outputs return np.array(total_inputs), np.array(total_outputs)
def predictGames(filename): inputs, labels = cdata.getAllMyData([filename], True, 0) data, indexToTeam, teamToIndex, indexToGamesPlayed = epl.getData(filename) model = makeModel(inputs, inputs.shape[1], labels.shape[1]) outputs = model.predict(inputs) print("SHAPE: ", inputs.shape, labels.shape, outputs.shape) print( "%20s : %s Goals %s %-20s Home Tie Away Accurate (Should be close to 1 for accuracy)" % ("Home Team", " " * 2, " " * 5, "Away Team")) indexToPoints = [0.0 for i in range(len(indexToTeam))] for i in range(len(data["Home Team"])): homeTeam = data["Home Team"][i] awayTeam = data["Away Team"][i] homeScore = None awayScore = None if (type(data["Result"][i]) is str): result = data["Result"][i].split("-") if len(result) == 2: homeScore = int(result[0].strip()) awayScore = int(result[1].strip()) homeIndex = teamToIndex[homeTeam] awayIndex = teamToIndex[awayTeam] homeTeamPlayed = indexToGamesPlayed[homeIndex] awayTeamPlayed = indexToGamesPlayed[awayIndex] output = np.array(outputs[i]) p_output = output / output.sum() if homeScore != None: if homeScore > awayScore: indexToPoints[homeIndex] += 3.0 elif homeScore < awayScore: indexToPoints[awayIndex] += 3.0 else: indexToPoints[homeIndex] += 1.0 indexToPoints[awayIndex] += 1.0 else: indexToPoints[homeIndex] += p_output[0] * 3.0 + p_output[2] indexToPoints[awayIndex] += p_output[1] * 3.0 + p_output[2] # Print Tie #print("%4.2f %4.2f %4.2f %4.2f" % (inputs[i][12], inputs[i][13], inputs[i][22], inputs[i][23])) if homeScore == None: print( "%20s : %s vs %s : %-20s %4.2f %4.2f %4.2f %10.2f %8.2f" % (homeTeam, " " * 4, " " * 4, awayTeam, p_output[0], p_output[2], p_output[1], output.sum(), 1.0 - (5 * (output.sum() - 1.0))**2)) #print("%20s : %s vs %s : %-20s %4.2f" % # (homeTeam, " " * 4, " " * 4, awayTeam, output[0])) else: print( "%20s : %4d vs %-4d : %-20s %4.2f %4.2f %4.2f %10.2f %8.2f" % (homeTeam, homeScore, awayScore, awayTeam, p_output[0], p_output[2], p_output[1], output.sum(), 1.0 - (5 * (output.sum() - 1.0))**2)) #print("%20s : %4d vs %-4d : %-20s %4.2f" % # (homeTeam, homeScore, awayScore, awayTeam, output[0])) points = np.array([indexToPoints, np.arange(len(indexToPoints))]) points = np.array(sorted(points.T, key=lambda x: x[0], reverse=True)) print("\n\n") print(" %-30s %5s" % ("Team", "Points")) for i in range(len(points)): print("%2d %-30s %5.3f" % (i + 1, indexToTeam[int(points[i][1])], points[i][0]))
def calculateProbs(predictionCount, csv_file_name, predict): data, indexToTeam, teamToIndex, indexToGamesPlayed = epl.getData( csv_file_name) indexToPlaceFinishedToTimesFinished = [ [0 for _ in range(len(indexToTeam))] for _ in range(len(indexToTeam)) ] indexToPoints = [0 for _ in range(len(indexToTeam))] indexToExpectedPoints = np.array([0.0 for _ in range(len(indexToTeam))]) gameIndexLeftToPlay = [] for i in range(len(data["Home Team"])): homeTeam = data["Home Team"][i] awayTeam = data["Away Team"][i] homeIndex = teamToIndex[homeTeam] awayIndex = teamToIndex[awayTeam] if not (type(data["Result"][i]) is str): gameIndexLeftToPlay.append(i) winnerProp, tieProb, loserProb = predict(homeIndex, awayIndex, i) total = winnerProp + tieProb + loserProb winnerProp = winnerProp / total tieProb = tieProb / total loserProb = loserProb / total indexToExpectedPoints[homeIndex] += winnerProp * 3 + tieProb indexToExpectedPoints[awayIndex] += loserProb * 3 + tieProb continue result = data["Result"][i].split("-") if len(result) != 2: gameIndexLeftToPlay.append(i) winnerProp, tieProb, loserProb = predict(homeIndex, awayIndex, i) total = winnerProp + tieProb + loserProb winnerProp = winnerProp / total tieProb = tieProb / total loserProb = loserProb / total indexToExpectedPoints[homeIndex] += winnerProp * 3 + tieProb indexToExpectedPoints[awayIndex] += loserProb * 3 + tieProb continue homeScore = int(result[0].strip()) awayScore = int(result[1].strip()) if homeScore > awayScore: indexToPoints[homeIndex] += 3 indexToExpectedPoints[homeIndex] += 3 elif homeScore < awayScore: indexToPoints[awayIndex] += 3 indexToExpectedPoints[awayIndex] += 3 else: indexToPoints[homeIndex] += 1 indexToPoints[awayIndex] += 1 indexToExpectedPoints[homeIndex] += 1 indexToExpectedPoints[awayIndex] += 1 for rounds in range(predictionCount): if rounds % 1000 == 0: print(rounds, (rounds / predictionCount)) roundIndexToPoints = indexToPoints.copy() for i in gameIndexLeftToPlay: homeTeam = data["Home Team"][i] awayTeam = data["Away Team"][i] homeIndex = teamToIndex[homeTeam] awayIndex = teamToIndex[awayTeam] winnerProp, tieProb, loserProb = predict(homeIndex, awayIndex, i) total = winnerProp + tieProb + loserProb winnerProp = winnerProp / total tieProb = tieProb / total if random.random() <= winnerProp: roundIndexToPoints[homeIndex] += 3 elif random.random() <= winnerProp + tieProb: roundIndexToPoints[homeIndex] += 1 roundIndexToPoints[homeIndex] += 1 else: roundIndexToPoints[awayIndex] += 3 rankings = [(roundIndexToPoints[i], i) for i in range(len(roundIndexToPoints))] rankings.sort(reverse=True) for i in range(len(rankings)): indexToPlaceFinishedToTimesFinished[rankings[i][1]][i] += 1 return np.array(indexToPlaceFinishedToTimesFinished ) / predictionCount, indexToExpectedPoints, indexToTeam