示例#1
0
 def get_act_afterstates(self, states):
     tmp_game = Geister2()
     max_num = self.bttl_num
     mat = np.zeros((len(states), max_num))
     for num in range(max_num):
         for act_i in range(len(states)):
             # tmp_gameに位置情報と自分の駒の色を設定
             num_red = 0  # 敵の赤駒の数
             is_vld = []  # 敵の盤上にある駒のi
             for i in range(16):
                 tmp_game.units[i].x = self._game.units[i].x
                 tmp_game.units[i].y = self._game.units[i].y
                 tmp_game.units[i].taken = self._game.units[i].taken
                 tmp_game.units[i].color = self._game.units[i].color
                 if (i >= 8):
                     tmp_game.units[i].color = 1  # 敵の青駒として設定
                     if (tmp_game.units[i].taken is False):
                         is_vld.append(i)
                     # 敵の赤駒ならばnum_redを1追加
                     num_red += 1 if tmp_game.units[i].color == 3 else 0
             # 敵の駒の色は推定せず,ランダムに
             self._rnd.shuffle(is_vld)
             for i in is_vld[:num_red]:
                 tmp_game.units[i].color = 3  # 敵の赤駒として設定
             tmp_game.on_action_number_received(act_i)
             mat[act_i, num] = battle_from(self.policy,
                                           self.policy,
                                           tmp_game=tmp_game)
     means = mat.mean(axis=1)
     return np.argmax(means)
示例#2
0
def test():
    seed = 2
    game = Geister2()

    tdagent = MCAgent(game, seed)
    tdagent.w = np.array([
        0.9, 0, 0, 0, 0, 0,
        0.8, 0, 0, 0, 0, 0,
        0.7, 0, 0, 0, 0, 0,
        0.6, 0, 0, 0, 0, 0,
        0.5, 0, 0, 0, 0, 0,
        0.1, 0, 0, 0, 0, 0,
        0,   0, 0, 0, 0, 1,

        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,

        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0
    ])

    rndagent = RandomAgent(game, seed)
    agents = (tdagent, rndagent)
    arr0, arr1 = (agent.init_red() for agent in agents)
    game.setRed(arr0)
    game.changeSide()
    game.setRed(arr1)
    game.changeSide()
    game.printBoard()
    player = 0
    while not game.is_ended():
        agent = agents[player]
        states = game.after_states()
        i_act = agent.get_act_afterstates(states)
        game.on_action_number_received(i_act)
        if player == 0:
            game.printBoard()
        game.changeSide()

        player = (player+1) % 2
示例#3
0
def battle2():
    seed = 301
    bttl_num = 50
    game = Geister2()
    agents = [[MCAgent(game, seed + i) for i in range(8)],
              [MCAgent(game, seed + i + 8) for i in range(8)]]
    agents_str = [
        "weights_2/td_learned2_" + str(i) + ".npy" for i in range(1, 9)
    ]
    for agent, string in zip(agents[0], agents_str):
        agent.w = load(string)
    agents_str = [
        "weights_3/td_learned3_" + str(i) + ".npy" for i in range(1, 9)
    ]
    for agent, string in zip(agents[1], agents_str):
        agent.w = load(string)
    means = np.zeros(8 * 8).reshape(8, 8)
    for i in range(8):
        for j in range(8):
            r_list = np.zeros(bttl_num)
            for t in range(bttl_num):
                agent_s = (agents[0][i], agents[1][j])
                arr0, arr1 = (agent.init_red() for agent in agent_s)
                game.__init__()
                game.setRed(arr0)
                game.changeSide()
                game.setRed(arr1)
                game.changeSide()
                player = 0
                while not game.is_ended():
                    agent = agent_s[player]
                    states = game.after_states()
                    i_act = agent.get_act_afterstates(states)
                    game.on_action_number_received(i_act)
                    game.changeSide()

                    player = (player + 1) % 2
                if player == 1:
                    game.changeSide()
                result = game.checkResult()
                r = (1 if (result > 0) else (-1 if (result < 0) else 0))
                r_list[t] = r
            means[i][j] = r_list.mean()
    print(means)
    print("mean: ", means.mean())
示例#4
0
def learn():
    file_name = "td_9"
    seed = 91
    game = Geister2()
    mcagent = MCAgent(game, seed)
    opponent = RandomAgent(game, seed+1)
    env = VsEnv(opponent, game, seed)
    mcagent.learn(env, seed)
    # for k in range(6*7*3):
    #     for i in range(3):
    #         for j in range(7):
    #             print((mcagent.w[j+i*(6*7)+k*(6*7*3):6+j+i*(6*7)+k*(6*7*3)]
    #                   * 1000).round()*(1/1000))
    #         print("-----------")
    #     print("-------------------")
    np.save(file_name, mcagent.w)
    w_td = np.load(file_name+'.npy')
    print(w_td.shape)
示例#5
0
def battle():
    seed = 29
    bttl_num = 10
    game = Geister2()
    agents_str = [
        "weights/weights_13/reinforce_" + str(i) + "_theta.npy"
        for i in range(1, 9)
    ]
    agent_len = len(agents_str)
    agents = [REINFORCEAgent(game, seed + i) for i in range(agent_len)]
    for agent, string in zip(agents, agents_str):
        agent.theta = load(string)
    means = np.zeros((agent_len, agent_len))
    for i in range(len(agents)):
        for j in range(i, len(agents)):
            if i == j:
                continue
            r_list = np.zeros(bttl_num)
            for t in range(bttl_num):
                agent_s = (agents[i], agents[j])
                arr0, arr1 = (agent.init_red() for agent in agent_s)
                game.__init__()
                game.setRed(arr0)
                game.changeSide()
                game.setRed(arr1)
                game.changeSide()
                player = 0
                while not game.is_ended():
                    agent = agent_s[player]
                    states = game.after_states()
                    i_act = agent.get_act_afterstates(states)
                    game.on_action_number_received(i_act)
                    game.changeSide()

                    player = (player + 1) % 2
                if player == 1:
                    game.changeSide()
                result = game.checkResult()
                r = (1 if (result > 0) else (-1 if (result < 0) else 0))
                r_list[t] = r
            means[i][j] = r_list.mean()
            means[j][i] = -means[i][j]
    print(means)
示例#6
0
def learn():
    file_name = "weights/rfvsrnd6"
    seed = 103
    game = Geister2()
    agent = REINFORCEAgent(game, seed)
    agent.w = np.random.randn(agent.W_SIZE) * agent.alpha * 0.0001
    agent.theta = np.random.randn(agent.T_SIZE) * agent.beta * 0.0001
    opponent = RandomAgent(game, seed + 1)
    env = VsEnv(opponent, game, seed)
    # 計測準備
    pr = cProfile.Profile()
    pr.enable()
    # 計測開始
    agent.learn(env, seed)
    # 計測終了,計測結果出力
    pr.disable()
    stats = pstats.Stats(pr)
    stats.sort_stats('cumtime')
    stats.print_stats()
    pr.dump_stats('profile.stats')
    # 事後処理

    np.save(file_name + "_w", agent.w)
    np.save(file_name + "_theta", agent.theta)
示例#7
0
def test2():
    mcagent = MCAgent(Geister2())
    mcagent.w = np.load("td_4.npy")
    print(mcagent.init_red())
示例#8
0
def ranking_learn(game):
    # ranking_leanのデータ読み込み
    with open(ranking_data_path, 'rt') as fin:
        cin = csv.reader(fin)
        datas = [row for row in cin if len(row) > 0]
        num_weights = int(datas[0][0])

    # ランキングデータの読み込み
    ranking_path = []
    ranking_n = []
    ranking_r = []
    with open(rankings_path, 'rt') as fin:
        cin = csv.reader(fin)
        datas = [row for row in cin if len(row) > 0]
        ranking_path = [row[2] for row in datas]
        ranking_n = [int(row[3]) for row in datas]
        ranking_r = [float(row[4]) for row in datas]

    # ランキングの人数が足りないときはランダムな重みのREINFORCEagentを追加
    while len(ranking_path) < num_rankingagents:
        ranking_path.append(get_path_radom(game))  # path_listの末尾を移動
        ranking_n.append(0)
        ranking_r.append(0)
    # ランキングの人数が多すぎるときはエラー
    if len(ranking_path) > num_rankingagents:
        print("error. ranking_num is over num_rankingagents")
        # path_list.append(ranking_path.pop(0))  # rankingの末尾を移動
        # del ranking_n[0]
        # del ranking_r[0]

    # rank agentsの重みの読み込み
    game = Geister2()
    train_is = rnd.sample(range(num_rankingagents), num_rankingagents // 2)
    test_is = [i for i in range(num_rankingagents) if i not in train_is]
    rank_agents = load_agents(ranking_path, game, None)
    train_agents = [rank_agents[i] for i in train_is]
    test_agents = [rank_agents[i] for i in test_is]

    # 新しいagentの作成
    agent = pick_agent(game)
    agent_path = weights_path + "/rankRF" + str(num_weights)
    # agnetの学習
    env = VsEnvs(train_agents, game, None)  # 対戦相手はランダムに一度だけ
    agent.learn(env, max_episodes=max_episodes)

    # 最新のランキングに対して改めて対戦を行い,(test_agentsのみ更新)
    # 基準を満たしていれば,agentのランキングへの追加
    results = []
    for i in test_is:
        test_agent = rank_agents[i]
        # resultはagentの勝率
        result = battle(agent, test_agent, bttl_num=bttl_num, seed=None)
        results.append(result)
        # 対戦相手の勝率を更新
        r_opp = -result
        ranking_r[i] = (ranking_r[i] * ranking_n[i] + r_opp) / (ranking_n[i] +
                                                                1)
        ranking_n[i] += 1
    results = np.array(results)
    # 基準を満たしている場合(rが一定値以上かつ過半数に対し勝利),ランキングに追加
    if (results.mean() > threshold
            and len(np.where(results > 0)[0]) > num_rankingagents / 2):
        # ランキングの削除対象(test_agentsのうち勝率が最低のもの)
        dl_index = ranking_r.index(min([ranking_r[i] for i in test_is]))
        ranking_path[dl_index] = agent_path
        ranking_n[dl_index] = ranking_r[dl_index] = 0

    # agentのデータの書き込み
    np.save(agent_path + "_w", agent.w)
    np.save(agent_path + "_theta", agent.theta)
    num_weights += 1

    # ranking_learnのデータ書き込み
    with open(ranking_data_path, 'wt') as fout:
        csvout = csv.writer(fout)
        datas = [[str(num_weights)]]
        csvout.writerows(datas)

    # ランキングデータの書き込み
    datas = [[str(i + 1), "REINFORCEAgent", ranking_path[i], n, r]
             for i, n, r in zip(range(len(ranking_path)), ranking_n, ranking_r)
             ]
    with open(rankings_path, 'wt') as fout:
        csvout = csv.writer(fout)
        csvout.writerows(datas)
示例#9
0
    # ranking_learnのデータ書き込み
    with open(ranking_data_path, 'wt') as fout:
        csvout = csv.writer(fout)
        datas = [[str(num_weights)]]
        csvout.writerows(datas)

    # ランキングデータの書き込み
    datas = [[str(i + 1), "REINFORCEAgent", ranking_path[i], n, r]
             for i, n, r in zip(range(len(ranking_path)), ranking_n, ranking_r)
             ]
    with open(rankings_path, 'wt') as fout:
        csvout = csv.writer(fout)
        csvout.writerows(datas)


if __name__ == "__main__":
    game = Geister2()
    # 計測準備
    pr = cProfile.Profile()
    pr.enable()
    # 計測開始
    while (True):
        ranking_learn(game)
    # 計測終了,計測結果出力
    pr.disable()
    stats = pstats.Stats(pr)
    stats.sort_stats('cumtime')
    stats.print_stats()
    pr.dump_stats('profile.stats')
    # 結果出力終了
示例#10
0
def cluster_learn():
    seed = 122
    file_name = "weights/weights_16/reinforce_"
    agents_len = 18
    max_episodes = 500 * (agents_len)
    plt_intvl = 50 * (agents_len)
    plt_bttl = 200
    linestyles = [':', '--', '-.']  # alphaに相当 # linestyle=(0, (1, 0))
    plt_colors = ['m', 'r', 'g', 'c', 'b', 'y']  # betaに相当,mマゼンタ(紫),cシアン(青緑)
    linestyle_avg = '-'
    plt_color_avg = 'k'
    alphas = [0.003, 0.005, 0.01]
    betas = [0.0005, 0.0001, 0.0003, 0.0005, 0.001, 0.0015]
    assert (len(linestyles) == len(alphas))
    assert (len(plt_colors) == len(betas))
    assert (len(alphas) * len(betas) == agents_len)

    game = Geister2()
    np.random.seed(seed)
    rnd = random.Random(seed)
    agents = [REINFORCEAgent(game, seed + i) for i in range(agents_len)]
    for i in range(len(alphas)):
        for j in range(len(betas)):
            agents[i + j * len(alphas)].alpha = alphas[i]
            agents[i + j * len(alphas)].beta = betas[j]
    # 重みを小さな正規乱数で初期化
    for agent in agents:
        if agent.w is None:
            agent.w = np.zeros(agent.W_SIZE)
        if agent.theta is None:
            agent.theta = np.zeros(agent.T_SIZE)

    episodes_x = []
    results_y = [[] for _ in range(agents_len)]
    avg_y = []
    rnd_agent = RandomAgent(game, seed * 2 + 1)
    env = VsEnv(agents[0], game, seed)
    for episode in range(max_episodes):
        # 学習個体を一度ずつ選ぶ(順番はランダム)
        for i in rnd.sample(range(agents_len), agents_len):  # -> [2, 0, 1]など
            # i = rnd.randrange(agents_len)  # 学習個体はランダム
            # # 対戦相手は,全ての候補を一度ずつ選ぶ(順番はランダム)
            # for j in rnd.sample(range(agents_len), agents_len):
            j = rnd.randrange(agents_len)  # 対戦相手はランダムに一度だけ
            agent = agents[i]
            env._opponent = agents[j]
            agent.learn(env, max_episodes=1)
        # 定期的にランダムとの対戦結果を描画
        if (episode + 1) % plt_intvl == 0:
            episodes_x.append(episode)
            plt.clf()
            opponent = rnd_agent
            env._opponent = opponent
            avgs = []
            for i in range(agents_len):
                agent = agents[i]
                theta = agent.theta
                r_list = np.zeros(plt_bttl)
                for bttl_i in range(plt_bttl):
                    afterstates = env.on_episode_begin(agent.init_red())
                    x = agent.get_x(afterstates)
                    a = agent.get_act(x, theta)
                    for t in range(300):
                        r, nafterstates = env.on_action_number_received(a)
                        if r != 0:
                            break
                        nx = agent.get_x(nafterstates)
                        na = agent.get_act(nx, theta)
                        x = nx
                        a = na
                    r_list[bttl_i] = r
                mean = r_list.mean()
                avgs.append(mean)
                results_y[i].append(mean)
                plt.figure(1)
                plt.title('Training...')
                plt.xlabel('Episode')
                plt.ylabel('Mean Results')
                x_list = np.array(episodes_x)
                y_list = np.array(results_y[i])
                plt.plot(x_list,
                         y_list,
                         linestyle=linestyles[i % len(alphas)],
                         c=plt_colors[i // len(alphas)],
                         label=str(i))
            avg_y.append(np.array(avgs).mean())
            plt.figure(1)
            plt.title('Training...')
            plt.xlabel('Episode')
            plt.ylabel('Mean Results')
            x_list = np.array(episodes_x)
            y_list = np.array(avg_y)
            plt.plot(x_list,
                     y_list,
                     linestyle=linestyle_avg,
                     c=plt_color_avg,
                     label=agents_len)
            plt.pause(0.01)  # pause a bit so that plots are updated
    plt.savefig(file_name + str(".png"))
    plt.show()
    for i in range(agents_len):
        np.save(file_name + str(i + 1) + "_w", agents[i].w)
        np.save(file_name + str(i + 1) + "_theta", agents[i].theta)
示例#11
0
 def setUp(self):
     game = Geister2()
     game.setRed(["E", "F", "G", "H"])
     game.changeSide()
     game.setRed(["E", "F", "G", "H"])
     self.game = game
示例#12
0
    def learn(self,
              env,
              seed=1,
              max_episodes=100000,
              draw_mode=False,
              draw_opp=None):
        alpha = self.alpha
        beta = self.beta
        # epsilon = self.epsilon
        # rnd = self._rnd
        assert (env.S_SIZE == self.S_SIZE)

        plt_intvl = 500
        plt_bttl = 50
        episodes_x = []
        results_y = []
        dlts_y = []
        dlts = []
        # 読み込み
        # mcagent.w = np.load("td_4.npy")
        # wを小さな正規乱数で初期化
        # np.random.seed(seed)
        # if self.w is None:
        #     self.w = np.random.randn(self.W_SIZE)*alpha*0.1
        # if self.theta is None:
        #     self.theta = np.random.randn(self.T_SIZE)*beta*0.1
        w = self.w
        theta = self.theta
        if draw_mode:
            denv = VsEnv(draw_opp, game=Geister2(), seed=seed)
        for episode in range(max_episodes):
            afterstates = env.on_episode_begin(self.init_red())
            xs = self.get_x([env.get_state()])[0]
            x = self.get_x(afterstates)
            a = self.get_act(x, theta)

            xs_list = [xs]
            x_list = [x]
            xa_list = [x[a]]

            for t in range(self.MAX_T):
                r, nafterstates = env.on_action_number_received(a)
                if r != 0:
                    break
                nxs = self.get_x([env.get_state()])[0]
                nx = self.get_x(nafterstates)
                na = self.get_act(nx, theta)
                xs_list.append(nxs)
                x_list.append(nx)
                xa_list.append(nx[na])

                x = nx
                a = na
            for xa, x, xs in zip(xa_list, x_list, xs_list):
                q = 2 / (1 + np.exp(-np.dot(w, xs))) - 1
                dlt = r - q  # 報酬予測は事後状態を用いてはならない
                dlts.append(dlt**2)
                w += beta * dlt * xs
                hs = x.dot(theta)
                hs -= hs.max()  # overflow回避のため
                exps = np.exp(hs)
                pis = exps / exps.sum()
                theta += alpha * r * (xa - pis.dot(x))
                # 焼きなまし法
                # theta += alpha*(episode/max_episodes)*r*(xa - pis.dot(x))

            if draw_opp is None and draw_mode:
                print("not implemented")
                raise Exception
            if draw_mode and ((episode + 1) % plt_intvl == 0):
                dlts_y.append(np.array(dlts).mean())
                dlts = []
                if draw_opp is not None:
                    denv._opponent = draw_opp
                    r_sum = 0.0
                    for bttl_i in range(plt_bttl):
                        afterstates = denv.on_episode_begin(self.init_red())
                        x = self.get_x(afterstates)
                        a = self.get_act(x, theta)
                        for t in range(300):
                            r, nafterstates = denv.on_action_number_received(a)
                            if r != 0:
                                break
                            nx = self.get_x(nafterstates)
                            na = self.get_act(nx, theta)
                            x = nx
                            a = na
                        r_sum += r
                    results_y.append(r_sum / plt_bttl)
                episodes_x.append(episode)
                # 一つ目 results
                plt.figure(2)
                plt.title('Training...')
                plt.xlabel('Episode')
                plt.ylabel('Mean Results of Interval')
                plt.text(50, 0.5, "alpha=" + str(self.alpha))
                plt.text(50, 0.4, "beta=" + str(self.beta))
                x_list = np.array(episodes_x)
                y_list = np.array(results_y)
                plt.plot(x_list, y_list)
                plt.pause(0.0001)  # pause a bit so that plots are updated
                plt.clf()
                # 二つ目 予測誤差 Δv(s)^2
                plt.figure(1)
                plt.title('Training...')
                plt.xlabel('Episode')
                plt.ylabel('Mean Dlt v(s)^2')
                plt.text(50, 0.5, "alpha=" + str(self.alpha))
                plt.text(50, 0.4, "beta=" + str(self.beta))
                x_list = np.array(episodes_x)
                y_list = np.array(dlts_y)
                plt.plot(x_list, y_list)
                plt.pause(0.0001)  # pause a bit so that plots are updated
                plt.clf()

        # 学習終了後
        if (draw_mode):
            # 一つ目 results
            plt.figure(2)
            plt.title('Training...')
            plt.xlabel('Episode')
            plt.ylabel('Mean Results of Interval')
            plt.text(50, 0.5, "alpha=" + str(self.alpha))
            plt.text(50, 0.4, "beta=" + str(self.beta))
            x_list = np.array(episodes_x)
            y_list = np.array(results_y)
            plt.plot(x_list, y_list)
            plt.show()
            # 二つ目 予測誤差 Δv(s)^2
            plt.figure(1)
            plt.title('Training...')
            plt.xlabel('Episode')
            plt.ylabel('Mean Dlt v(s)^2')
            plt.text(50, 0.5, "alpha=" + str(self.alpha))
            plt.text(50, 0.4, "beta=" + str(self.beta))
            x_list = np.array(episodes_x)
            y_list = np.array(dlts_y)
            plt.plot(x_list, y_list)
            plt.show()
示例#13
0
        game.changeSide()
        game.setRed(arr1)
        game.changeSide()
        player = 0
        while not game.is_ended():
            agent = agents[player]
            states = game.after_states()
            i_act = agent.get_act_afterstates(states)
            game.on_action_number_received(i_act)
            game.changeSide()

            player = (player + 1) % 2
        if player == 1:
            game.changeSide()
        result = game.checkResult()
        r = (1 if (result > 0) else (-1 if (result < 0) else 0))
        results[t] = r
    return results.mean()


if __name__ == "__main__":
    seed = 100
    geister = Geister2()
    agents1 = [load_agent("weights/rfvsrnd5", geister, seed)]
    agents2 = [
        load_agent(("weights/weights_13/reinforce_" + str(i)), geister, seed)
        for i in range(1, 10)
    ]
    results = battle2(agents1, agents2, 100)
    print(results)
示例#14
0
    def __init__(self, opponent, game=Geister2(), seed=0):
        self._opponent = opponent
        self._game = game
        self._seed = seed

        self.S_SIZE = (6 * 6 + 6) * 3
示例#15
0
def self_play(file_name, agent=None, max_train=50000):
    seed = 0
    max_episodes = max_train
    plt_intvl = max_episodes + 1  # プロットしない
    plt_bttl = 200
    linestyle = '-'  # alphaに相当 # linestyle=(0, (1, 0))
    plt_color = 'k'  # betaに相当, # mマゼンタ(紫),cシアン(青緑)なども
    alpha = 0.001
    beta = 0.0001

    game = Geister2()
    np.random.seed(seed)
    if agent is None:
        agent = REINFORCEAgent(game, seed)
        agent.w = np.zeros(agent.W_SIZE)
        agent.theta = np.zeros(agent.T_SIZE)
    agent.alpha = alpha
    agent.beta = beta

    episodes_x = []
    results_y = []
    rnd_agent = RandomAgent(game, seed*2+1)
    env = VsEnv(agent, game, seed)
    denv = VsEnv(rnd_agent, game, seed)
    for episode in range(max_episodes):
        agent.alpha = alpha  # * (1 - episode/max_episodes)
        agent.beta = beta   # * (1 - episode/max_episodes)
        agent.learn(env, max_episodes=1)
        # 定期的にランダムとの対戦結果を描画
        if (episode) % plt_intvl == 0:
            episodes_x.append(episode)
            plt.clf()
            opponent = rnd_agent
            denv._opponent = opponent
            theta = agent.theta
            r_list = np.zeros(plt_bttl)
            for bttl_i in range(plt_bttl):
                afterstates = denv.on_episode_begin(agent.init_red())
                x = agent.get_x(afterstates)
                a = agent.get_act(x, theta)
                for t in range(300):
                    r, nafterstates = denv.on_action_number_received(a)
                    if r != 0:
                        break
                    nx = agent.get_x(nafterstates)
                    na = agent.get_act(nx, theta)
                    x = nx
                    a = na
                r_list[bttl_i] = r
            mean = r_list.mean()
            results_y.append(mean)
            plt.figure(1)
            plt.title('Training...')
            plt.xlabel('Episode')
            plt.ylabel('Mean Results')
            x_list = np.array(episodes_x)
            y_list = np.array(results_y)
            plt.plot(x_list, y_list,
                     linestyle=linestyle,
                     c=plt_color)
            plt.pause(0.01)  # pause a bit so that plots are updated
    plt.savefig(file_name+str(".png"))
    # self play向けに変更
    np.save(file_name+"_w", agent.w)
    np.save(file_name+"_theta", agent.theta)

    # numpyに変換し,グラフの情報を保存
    np.save(file_name+"x_list", np.array(x_list))
    np.save(file_name+"results_y", np.array(results_y))
示例#16
0
 def __init__(self, opponents, game=Geister2(), seed=0):
     self._opponents = opponents
     opp = rnd.choice(opponents)
     return super().__init__(opponent=opp, game=game, seed=seed)
示例#17
0
def add_in_ranking(path_list):
    # ランキングデータの読み込み
    ranking_path = []
    ranking_n = []
    ranking_r = []
    with open('rankings', 'rt') as fin:
        cin = csv.reader(fin)
        datas = [row for row in cin if len(row) > 0]
        ranking_path = [row[2] for row in datas]
        ranking_n = [int(row[3]) for row in datas]
        ranking_r = [float(row[4]) for row in datas]

    # ランキングの人数が足りないときはagentを追加
    while len(ranking_path) < agentsnum:
        ranking_path.append(path_list.pop(0))  # path_listの末尾を移動
        ranking_n.append(0)
        ranking_r.append(0)
    # ランキングの人数が多すぎるときは追加候補に移動
    while len(ranking_path) > agentsnum:
        path_list.append(ranking_path.pop(0))  # rankingの末尾を移動
        del ranking_n[0]
        del ranking_r[0]

    # agentの重みの読み込み
    game = Geister2()
    agents = load_agents(path_list, game, seed)
    rank_agents = load_agents(ranking_path, game, seed)

    # 対戦を行い基準を満たせば追加.
    for i in range(len(agents)):
        agent = agents[i]
        results = []
        # 最新のランキングに更新
        rank_agents = load_agents(ranking_path, game, seed)
        for j in range(len(rank_agents)):
            rank_agent = rank_agents[j]
            # resultはagentの勝率
            result = battle(agent, rank_agent, bttl_num=bttl_num, seed=seed)
            results.append(result)
            # 対戦相手の勝率を更新
            r_opp = -result
            ranking_r[j] = (ranking_r[j] * ranking_n[j] +
                            r_opp) / (ranking_n[j] + 1)
            ranking_n[j] += 1
        results = np.array(results)
        # 基準を満たしていない場合
        if (results.mean() <= threshold
                or len(np.where(results > 0)[0]) <= agentsnum / 2):
            continue
        # 基準を満たしている場合(rが一定値以上かつ過半数に対し勝利),ランキングに追加
        dl_index = ranking_r.index(min(ranking_r))  # ランキングの削除対象
        ranking_path[dl_index] = path_list[i]
        ranking_n[dl_index] = ranking_r[dl_index] = 0

    # データの書き込み
    datas = [[str(i + 1), "REINFORCEAgent", ranking_path[i], n, r]
             for i, n, r in zip(range(len(ranking_path)), ranking_n, ranking_r)
             ]
    with open('rankings', 'wt') as fout:
        csvout = csv.writer(fout)
        csvout.writerows(datas)