Пример #1
0
def main():
    winners = {}
    winners["1"] = 0
    winners["-1"] = 0  # Collecting stats of the games
    nGames = 1000  # how many games?
    arr = np.zeros(nGames)
    for g in range(nGames):
        winner = Backgammon.play_a_game(commentary=False, net=new_agent)
        # print("game %i finished", g)
        # print("winner is ", winner)
        winners[str(winner)] += 1
        arr[g] = winner
        # print("this is hw")
        # print(new_agent.val_func_nn.hidden_weights)
        # print("this is theta")
        # print(new_agent.policy_nn.theta)
        if (g % 50 == 0):
            print(new_agent.torch_nn_policy.theta)
    # print(winners)
    file = open('Failed.py', 'w')
    file.write(np.array_str(arr))
    file.close()
    print("Out of", nGames, "games,")
    print("player", 1, "won", winners["1"], "times and")
    print("player", -1, "won", winners["-1"], "times")
Пример #2
0
def main():
    winners = {}
    winners["1"] = 0
    winners["-1"] = 0  # Collecting stats of the games
    nGames = 10000  # how many games?
    arr = np.zeros(nGames)
    for g in tqdm(range(nGames)):

        #        w=new_agent.actor.theta
        #        print(w)

        ###Zero eligibility traces (according to psudo code)
        agent.actor.zero_el()
        agent.critic.zero_el()

        winner = Backgammon.play_a_game(commentary=False, net=agent)

        winners[str(winner)] += 1
        arr[g] = winner


#        if(g % 100 == 0):
#            print(new_agent.torch_nn_policy.theta)
# print(winners)
#
##Save the agent
    file_net = open('saved_net_one_2', 'wb')
    pickle.dump(agent, file_net)
    file_net.close()

    print("Out of", nGames, "games,")
    print("player", 1, "won", winners["1"], "times and")
    print("player", -1, "won", winners["-1"], "times")
def main():
    ranges = 1
    winners = {}
    winners["1"] = 0
    winners["-1"] = 0  # Collecting stats of the games
    nGames = 1000   # how many games?
    arr = np.zeros(nGames)
    for g in tqdm(range(nGames)):
        # ##Zero eligibility traces (according to psudo code)
        winner = Backgammon.play_a_game(commentary=False, net=agent, train=train)
        winners[str(winner)] += 1
        arr[g] = winner             
        if(g % 10 == 0):

            print(agent.actor.theta)
            k = winners["1"]
            print("winrate is %f" % (k / (g + 0.00000001)))
    # print(winners)
    #  Save the agent
    if(train is True):
        file_net = open('saved_net_one', 'wb')
        pickle.dump(agent.actor.theta, file_net)
        file_net.close()
    print("Out of", ranges, nGames, "games,")
    print("player", 1, "won", winners["1"], "times and")
    print("player", -1, "won", winners["-1"], "times")
Пример #4
0
def evaluate(agent, evaluation_agent, n_eval, n_games):
    wins = 0
    for i in range(n_eval):
        winner, board = Backgammon.play_a_game(agent, evaluation_agent)
        wins += int(winner == 1)
    winrate = round(wins / n_eval * 100, 3)
    print("Win-rate after training for " + str(n_games) + " games: " +
          str(winrate) + "%")
    return winrate
Пример #5
0
    for i in range(n_eval):
        winner, board = Backgammon.play_a_game(agent, evaluation_agent)
        wins += int(winner == 1)
    winrate = round(wins / n_eval * 100, 3)
    print("Win-rate after training for " + str(n_games) + " games: " +
          str(winrate) + "%")
    return winrate


def train(n_games=200_000, n_epochs=5000, n_eval=1000):
    agent = kotra
    evaluation_agent = pubeval

    winrates = []
    for g in range(n_games):
        if g % n_epochs == 0 and g != 0:
            winrate = evaluate(agent, evaluation_agent, n_eval, n_games=g)
            winrates.append(winrate)

        winner, board = Backgammon.play_a_game(agent,
                                               agent,
                                               train=True,
                                               train_config={'g': g})
        agent.game_over_update(board, int(winner == 1))
        agent.game_over_update(kotra.flip_board(board), int(winner == -1))

    plot_perf(winrates)


# ----- main -----
train()