def calculateProbsOfEachGameInASeason(csv_file_name, predict):
    data, indexToTeam, teamToIndex, indexToGamesPlayed = epl.getData(
        csv_file_name)

    game_data = []

    for i in range(len(data["Home Team"])):
        homeTeam = data["Home Team"][i]
        awayTeam = data["Away Team"][i]

        homeIndex = teamToIndex[homeTeam]
        awayIndex = teamToIndex[awayTeam]

        winnerProp, tieProb, loserProb = predict(homeIndex, awayIndex, i)
        total = winnerProp + tieProb + loserProb

        winnerProp = winnerProp / total
        tieProb = tieProb / total
        loserProb = loserProb / total

        game_data.append({
            'Home Team': homeTeam,
            'Away Team': awayTeam,
            'Round Number': int(data['Round Number'][i]),
            'Date': data['Date'][i],
            'Location': data['Location'][i],
            'home_win': float(winnerProp),
            'tie': float(tieProb),
            'away_win': float(loserProb),
        })

    return game_data
def createPredictGameFunction(filename):
    inputs, labels = cdata.getAllMyData([filename], True, 0)
    data, indexToTeam, teamToIndex, indexToGamesPlayed = epl.getData(filename)

    model = makeModel(inputs, inputs.shape[1], labels.shape[1])
    outputs = model.predict(inputs)

    def predict(homeIndex, awayIndex, index):
        output = np.array(outputs[index])
        p_output = output / output.sum()
        return (p_output[0], p_output[2], p_output[1])

    return predict
示例#3
0
def main(csv_file_name='epl-2021.csv',
         output_file_name='epl-predictions-stats.json'):

    model = epl.getData(csv_file_name)
    data, indexToTeam, teamToIndex, indexToGamesPlayed = model
    predictor = predictions_tensorflow.createPredictGameFunction(csv_file_name)
    indexToPlaceFinishedToTimesFinished, indexToMeanPoints, indexToTeam = probs.calculateProbs(
        10**5, csv_file_name, predictor)

    week = 40

    A = np.array(epl.getMatrixForSeason(week, model, None))
    R = pagerank.rank(A)

    rankings = [(indexToMeanPoints[i], i)
                for i in range(len(indexToMeanPoints))]
    rankings.sort(reverse=True)

    jsonData = []
    for value in rankings:
        probabilties = indexToPlaceFinishedToTimesFinished[value[1]]
        jsonData.append({
            "name":
            indexToTeam[value[1]],
            "probability":
            list(probabilties),
            "expected":
            value[0],
            "championslegue":
            probabilties[0] + probabilties[1] + probabilties[2] +
            probabilties[3],
            "relegated":
            probabilties[-1] + probabilties[-2] + probabilties[-3],
            "pagerank":
            R[value[1]][0],
        })

    games_data = probs.calculateProbsOfEachGameInASeason(
        csv_file_name, predictor)

    with open(output_file_name, 'w') as out:
        out.write(json.dumps({'teams': list(jsonData), 'games': games_data}))
def getAllMyData(
        filenames=[
            #"old_data/american-football.csv/nfl-2020.csv",
            #"old_data/american-football.csv/nfl-2019.csv",
            #"old_data/american-football.csv/nfl-2018.csv",
            #"old_data/american-football.csv/nfl-2017.csv",
            #"old_data/american-football.csv/nfl-2016.csv",
            "old_data/bundesliga-2018.csv",
            "old_data/bundesliga-2019.csv",
            "old_data/bundesliga-2020.csv",
            "old_data/efl-championship-2019.csv",
            "old_data/efl-championship-2020.csv",
            "old_data/la-liga-2018.csv",
            "old_data/la-liga-2019.csv",
            #"old_data/la-liga-2020.csv",
            "old_data/ligue-1-2018.csv",
            "old_data/ligue-1-2019.csv",
            "old_data/ligue-1-2020.csv",
            "old_data/serie-a-2017.csv",
            "old_data/serie-a-2018.csv",
            "old_data/serie-a-2019.csv",
            "old_data/serie-a-2020.csv",
            "old_data/turkey-super-lig-2019.csv",
            "old_data/turkey-super-lig-2020.csv",
            "old_data/epl-2009.csv",
            "old_data/epl-2010.csv",
            "old_data/epl-2011.csv",
            "old_data/epl-2012.csv",
            "old_data/epl-2013.csv",
            "old_data/epl-2014.csv",
            "old_data/epl-2015.csv",
            "old_data/epl-2016.csv",
            "old_data/epl-2017.csv",
            "old_data/epl-2018.csv",
            "old_data/epl-2019.csv",
            "old_data/epl-2020.csv",
            "epl-2021.csv",
        ],
        getAllInputs=False,
        starting_week=10):

    total_inputs = []
    total_outputs = []

    for filename in filenames:

        print("READING ", filename)

        bothFuncs = [
            addNewInputAndOutput,
            addRankToInputs("A"),
            addRankToInputs("A-week"),
            addRankToInputs("A-G"),
            addRankToInputs("A-G-week"),
            createA("A", 1.0, 0.0, None),
            createA("A-week", 1.0, 0.0, createWeekMultiplier(0.5)),
            createA("A-G", 0.0, 1.0, None),
            createA("A-G-week", 0.0, 1.0, createWeekMultiplier(0.5)),
            addAverageToInputs(),
        ]

        repeatFuncs = [
            addNewInputAndOutput,
            addRankToInputs("A"),
            addRankToInputs("A-week"),
            addRankToInputs("A-G"),
            addRankToInputs("A-G-week"),
            createA("A", 1.0, 0.0, None),
            createA("A-week", 1.0, 0.0, createWeekMultiplier(0.5)),
            createA("A-G", 0.0, 1.0, None),
            createA("A-G-week", 0.0, 1.0, createWeekMultiplier(0.5)),
        ]

        model = epl.getData(filename, False)
        data = loopThroughData(model, bothFuncs, not (getAllInputs),
                               starting_week)

        inputs = data["inputs"]
        outputs = data["outputs"]

        model = epl.getData(filename, True)
        data = loopThroughData(model, repeatFuncs, not (getAllInputs),
                               starting_week)

        inps = data["inputs"]

        assert (len(inputs) == len(inps))
        for i in range(len(inputs)):
            inputs[i] += inps[i]

        total_inputs += inputs
        total_outputs += outputs

    return np.array(total_inputs), np.array(total_outputs)
def predictGames(filename):

    inputs, labels = cdata.getAllMyData([filename], True, 0)
    data, indexToTeam, teamToIndex, indexToGamesPlayed = epl.getData(filename)

    model = makeModel(inputs, inputs.shape[1], labels.shape[1])
    outputs = model.predict(inputs)

    print("SHAPE: ", inputs.shape, labels.shape, outputs.shape)

    print(
        "%20s : %s Goals %s %-20s     Home   Tie    Away         Accurate (Should be close to 1 for accuracy)"
        % ("Home Team", " " * 2, " " * 5, "Away Team"))

    indexToPoints = [0.0 for i in range(len(indexToTeam))]

    for i in range(len(data["Home Team"])):

        homeTeam = data["Home Team"][i]
        awayTeam = data["Away Team"][i]

        homeScore = None
        awayScore = None
        if (type(data["Result"][i]) is str):
            result = data["Result"][i].split("-")
            if len(result) == 2:
                homeScore = int(result[0].strip())
                awayScore = int(result[1].strip())

        homeIndex = teamToIndex[homeTeam]
        awayIndex = teamToIndex[awayTeam]

        homeTeamPlayed = indexToGamesPlayed[homeIndex]
        awayTeamPlayed = indexToGamesPlayed[awayIndex]

        output = np.array(outputs[i])
        p_output = output / output.sum()

        if homeScore != None:
            if homeScore > awayScore:
                indexToPoints[homeIndex] += 3.0
            elif homeScore < awayScore:
                indexToPoints[awayIndex] += 3.0
            else:
                indexToPoints[homeIndex] += 1.0
                indexToPoints[awayIndex] += 1.0
        else:
            indexToPoints[homeIndex] += p_output[0] * 3.0 + p_output[2]
            indexToPoints[awayIndex] += p_output[1] * 3.0 + p_output[2]

        # Print Tie
        #print("%4.2f %4.2f %4.2f %4.2f" % (inputs[i][12], inputs[i][13], inputs[i][22], inputs[i][23]))
        if homeScore == None:
            print(
                "%20s : %s vs %s : %-20s     %4.2f   %4.2f   %4.2f   %10.2f %8.2f"
                % (homeTeam, " " * 4, " " * 4, awayTeam, p_output[0],
                   p_output[2], p_output[1], output.sum(), 1.0 -
                   (5 * (output.sum() - 1.0))**2))
            #print("%20s : %s vs %s : %-20s     %4.2f" %
            #        (homeTeam, " " * 4, " " * 4, awayTeam, output[0]))
        else:
            print(
                "%20s : %4d vs %-4d : %-20s     %4.2f   %4.2f   %4.2f   %10.2f %8.2f"
                % (homeTeam, homeScore, awayScore, awayTeam, p_output[0],
                   p_output[2], p_output[1], output.sum(), 1.0 -
                   (5 * (output.sum() - 1.0))**2))
            #print("%20s : %4d vs %-4d : %-20s     %4.2f" %
            #        (homeTeam, homeScore, awayScore, awayTeam, output[0]))

    points = np.array([indexToPoints, np.arange(len(indexToPoints))])
    points = np.array(sorted(points.T, key=lambda x: x[0], reverse=True))
    print("\n\n")
    print("   %-30s %5s" % ("Team", "Points"))
    for i in range(len(points)):
        print("%2d %-30s %5.3f" %
              (i + 1, indexToTeam[int(points[i][1])], points[i][0]))
def calculateProbs(predictionCount, csv_file_name, predict):
    data, indexToTeam, teamToIndex, indexToGamesPlayed = epl.getData(
        csv_file_name)

    indexToPlaceFinishedToTimesFinished = [
        [0 for _ in range(len(indexToTeam))] for _ in range(len(indexToTeam))
    ]
    indexToPoints = [0 for _ in range(len(indexToTeam))]
    indexToExpectedPoints = np.array([0.0 for _ in range(len(indexToTeam))])

    gameIndexLeftToPlay = []
    for i in range(len(data["Home Team"])):

        homeTeam = data["Home Team"][i]
        awayTeam = data["Away Team"][i]

        homeIndex = teamToIndex[homeTeam]
        awayIndex = teamToIndex[awayTeam]

        if not (type(data["Result"][i]) is str):
            gameIndexLeftToPlay.append(i)

            winnerProp, tieProb, loserProb = predict(homeIndex, awayIndex, i)
            total = winnerProp + tieProb + loserProb
            winnerProp = winnerProp / total
            tieProb = tieProb / total
            loserProb = loserProb / total

            indexToExpectedPoints[homeIndex] += winnerProp * 3 + tieProb
            indexToExpectedPoints[awayIndex] += loserProb * 3 + tieProb

            continue

        result = data["Result"][i].split("-")
        if len(result) != 2:
            gameIndexLeftToPlay.append(i)

            winnerProp, tieProb, loserProb = predict(homeIndex, awayIndex, i)
            total = winnerProp + tieProb + loserProb
            winnerProp = winnerProp / total
            tieProb = tieProb / total
            loserProb = loserProb / total

            indexToExpectedPoints[homeIndex] += winnerProp * 3 + tieProb
            indexToExpectedPoints[awayIndex] += loserProb * 3 + tieProb

            continue

        homeScore = int(result[0].strip())
        awayScore = int(result[1].strip())

        if homeScore > awayScore:
            indexToPoints[homeIndex] += 3
            indexToExpectedPoints[homeIndex] += 3
        elif homeScore < awayScore:
            indexToPoints[awayIndex] += 3
            indexToExpectedPoints[awayIndex] += 3
        else:
            indexToPoints[homeIndex] += 1
            indexToPoints[awayIndex] += 1
            indexToExpectedPoints[homeIndex] += 1
            indexToExpectedPoints[awayIndex] += 1

    for rounds in range(predictionCount):
        if rounds % 1000 == 0:
            print(rounds, (rounds / predictionCount))
        roundIndexToPoints = indexToPoints.copy()
        for i in gameIndexLeftToPlay:
            homeTeam = data["Home Team"][i]
            awayTeam = data["Away Team"][i]

            homeIndex = teamToIndex[homeTeam]
            awayIndex = teamToIndex[awayTeam]

            winnerProp, tieProb, loserProb = predict(homeIndex, awayIndex, i)
            total = winnerProp + tieProb + loserProb
            winnerProp = winnerProp / total
            tieProb = tieProb / total

            if random.random() <= winnerProp:
                roundIndexToPoints[homeIndex] += 3
            elif random.random() <= winnerProp + tieProb:
                roundIndexToPoints[homeIndex] += 1
                roundIndexToPoints[homeIndex] += 1
            else:
                roundIndexToPoints[awayIndex] += 3

        rankings = [(roundIndexToPoints[i], i)
                    for i in range(len(roundIndexToPoints))]
        rankings.sort(reverse=True)

        for i in range(len(rankings)):
            indexToPlaceFinishedToTimesFinished[rankings[i][1]][i] += 1

    return np.array(indexToPlaceFinishedToTimesFinished
                    ) / predictionCount, indexToExpectedPoints, indexToTeam