def exp_effect_of_action_restrict_for_time(depth=5, func_id=2): gamma = 100000 # スレッショルドカットを実施しない create_ev_table(ev_table, select_func(func_id)) # 評価関数は固定 state = State() restrict_time = 0.0 no_restrict_time = 0.0 while True: # ゲーム終了時 if state.is_done(): break # 行動の取得 if state.is_first_player(): action = random_action(state) # ランダム行動 else: # 行動数の削減あり start = time.time() for _ in range(50): action = alpha_beta_action(state, gamma, depth, True) restrict_time += time.time() - start # 行動数の削減なし start = time.time() for _ in range(50): action = alpha_beta_action(state, gamma, depth, False) no_restrict_time += time.time() - start action = random_action(state) # お互いにランダム行動をさせる state = state.next(action) print("restrict:", restrict_time, "no_restrict:", no_restrict_time)
def exp_gamma_time(depth=5, func_id=2, seed=random.random()): print("seed", seed) random.seed(seed) state = State() create_ev_table(ev_table, select_func(func_id)) keep_gamma_time = [0] * 30 # ゲーム終了までのループ while True: # ゲーム終了時 if state.is_done(): break # 行動の取得 if state.is_first_player(): action = random_action(state) else: gamma = 0.0 for index, _ in enumerate(keep_gamma_time): start = time.time() for _ in range(100): # action = alpha_beta_action(state, gamma) action = alpha_beta_action(state, gamma, depth, False) keep_gamma_time[index] += time.time() - start gamma += 0.1 # データをばらつかせるためにランダム行動をとる action = random_action(state) print(keep_gamma_time) # 次の状態の取得 state = state.next(action)
def exp_value_changing(depth=5, func_id=3, gamma=1.0, seed=random.random()): record_values = [] # 評価値を記録 record_boards = [] # 評価値に連動して盤面を記録 for i in range(100): random.seed(seed * (i + 1)) state = State() ii_state = AccessableState() values = [] boards = [] while True: if state.is_done(): break if state.is_first_player(): action = move_ordering_alpha_beta_action(state, 1, depth, i) # 盤面の評価値を算出し記録 ii_state.create_ii_state_from_state(state) values.append(evaluate_board_state(ii_state)) boards.append([state.pieces, state.enemy_pieces]) else: action = random_action(state) state = state.next(action) record_values.apped(values) record_boards.apped(boards) # TODO: csvに出力する print(record_values) print(record_boards)
def playout(state): if state.is_lose(): return -1 if state.is_draw(): return 0 return -playout(state.next(random_action(state))) # 再帰的に探索
def rollout(self, state): cur_player = state.player while state.winner == 0: while True: a = random_action() if state.legal_move(a): break state.make_move(a) return cur_player == state.winner
def exp_move_ordering_time(depth=5, func_id=3, gamma=1.0, seed=random.random()): print("seed", seed) timer = [0.0] * (depth + 1) random.seed(seed) state = State() while True: if state.is_done(): break for i in range(depth + 1): start = time.time() for _ in range(1): move_ordering_alpha_beta_action(state, 1, depth, i) timer[i] += time.time() - start action = random_action(state) state = state.next(action) # ランダムにゲームを進める
# else: # pass if __name__ == "__main__": os.environ["OMP_NUM_THREADS"] = "1" with open("config.yaml") as f: args = yaml.safe_load(f) # print(args) # ここに実験用のコードを書く state = State() while True: print(state.legal_actions()) state = state.next(random_action(state)) # path = "models/10000.pth" # EvalHandyRL(100, path) # policies = obs_to_policy_to_use_game(agent, obs, state) # print(policies) # convert_state_to_obs(state) # test_predict() # test_cigeister() # 方策を持ってくる # 接続部分
def main(): print( "Please input a port number that you want to connect. (10000 or 10001)" ) # port = 10000 # 先手 # port = 10001 # 後手 port = int(input()) # クライアントの作成 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: # サーバを指定 s.connect(("localhost", port)) # s.connect(("15.152.47.162", port)) # 1 # s.connect(("13.208.76.157", port)) # 2 # s.connect(("13.208.214.66", port)) # 3 # s.connect(("13.208.184.19", port)) # 4 # ネットワークのバッファサイズ。サーバからの文字列を取得(recv)する data = s.recv(2048) print(repr(data)) # 赤駒のセット red_piece = "" piece_char_list = ["A", "B", "C", "D", "E", "F", "G", "H"] confirmed_info = KeepConfirmedInfo() for i, color in enumerate(confirmed_info.my_pieces_color): if color == 2: red_piece += piece_char_list[i] send_str = "SET:" + red_piece + "\r\n" s.sendall(send_str.encode(encoding="utf-8")) data = s.recv(2048) print(repr(data)) if str(repr(data)) == str(b"OK \r\n"): print("駒のセット成功") else: print("駒のセット失敗") now_tcp_str = "" # 直前に受け取ったtcp # 受け取ったdataをstrに変換 data = s.recv(2048) now_tcp_str = data.decode(encoding="utf-8") print(now_tcp_str) # ちょっとルールベースな処理 rl_action = HandyAction("models/108000.pth") turn_count = 0 while 1: # 確定している情報からStateを生成 state = confirmed_info.create_state() print(state) # stateからactionを決定 action = alpha_beta_action(state, 5, False) print(action) # actionが変(0とか)だったらとりあえず修正 if action not in state.legal_actions(): action = random_action(state) # 自分の行動をsendall可能なバイナリデータに変換 sendall_str_b = confirmed_info.action_to_sendall_str(action) print("送信データ(行動):", str(sendall_str_b)) s.sendall(sendall_str_b) data = s.recv(2048) # OK or OKR now_tcp_str = data.decode(encoding="utf-8") print("データ送信の可否:", now_tcp_str) # 自分の行動からconfirmed_infoを更新 confirmed_info.update_coo_from_my_aciton(action, now_tcp_str) # 相手の行動 data = s.recv(2048) now_tcp_str = data.decode(encoding="utf-8") print("相手の行動:", now_tcp_str) # 相手の行動からconfirmed_infoを更新 # confirmed_info.update_coo_from_enemy_aciton(now_tcp_str) # 更新が正常に動作しないことがあるため、毎回リセットする(仮置き) confirmed_info.reset_keep_confirmed_info(now_tcp_str) # 自分と相手で2ターン経過 turn_count += 2