def opponent(self): if self.mark.opponent() == 1: return Maru() elif self.mark.opponent() == 0: return Empty() elif self.mark.opponent() == -1: return Batsu()
def is_draw(self): ''' 盤面の状態が引き分けかどうか return: True:引き分け False:引き分けではない(勝負がついていない) ''' if len(self.get_valid_actions()) == 0 and not self.is_win(Mark( Maru())) and not self.is_win(Mark(Batsu())): return True return False
def is_end(self): ''' 盤面の状態がゲーム修了かどうか return: True:修了 False:修了していない(勝負がついていない) ''' if self.is_win(Mark(Maru())) or self.is_win(Mark(Batsu())) or len( self.get_valid_actions()) == 0: return True return False
# 勝者の報酬 current_player.learn(1, True) # 敗者の報酬 self.players[result.opponent().to_int()].learn(-1, True) if verbose: print("%s" % (state.to_array())) print("-" * 5) state.output() print("-" * 5) print("%s win!!!" % (self.players[current_player_mark].mark.to_string())) break elif state.is_draw(): result = Mark(Empty()) for player in self.players.itervalues(): player.learn(0, True) if verbose: state.output() print("draw.") break current_player_mark = self.players[ current_player_mark].mark.opponent().to_int() #print("="*30) if __name__ == '__main__': player1 = Player(Mark(Maru())) player2 = Player(Mark(Batsu())) game = Game(player1, player2) game.start(verbose=True)
print new_state.get(0).to_int() print state1.to_array() print new_state.to_array() print state1.win_state print state1.is_win(Mark(Maru())) maru1 = Mark(Maru()) maru2 = Mark(Maru()) print maru1.to_int() == maru2.to_int() new_state2 = new_state.set(1, Mark(Maru())) new_state3 = new_state2.set(2, Mark(Maru())) print new_state3.to_array() print new_state3.is_win(Mark(Maru())) print new_state2.is_draw() print new_state2.is_end() print new_state3.is_end() new_state = state1.set(0, Mark(Batsu())) new_state2 = new_state.set(1, Mark(Maru())) new_state4 = new_state2.set(2, Mark(Batsu())) new_state5 = new_state4.set(3, Mark(Maru())) new_state6 = new_state5.set(4, Mark(Batsu())) new_state7 = new_state6.set(5, Mark(Batsu())) new_state8 = new_state7.set(6, Mark(Maru())) new_state9 = new_state8.set(7, Mark(Batsu())) new_state10 = new_state9.set(8, Mark(Maru())) print new_state10.to_array() print new_state10.is_draw() new_state10.output() state11 = State() state12 = State()
状態 報酬 事後状態から行動を行った後の状態 ''' if next_state is None: next_state_value = 0.0 else: #状態価値テーブルから価値を取得 next_state_value = self.value[next_state] # 行動価値の更新式 self.value[state] += self.step_size * (reward + next_state_value - self.value[state]) #print("value[state] %f", self.value[state]) if __name__ == '__main__': value = Value() state = State() #print value.get_value(state) #print value.get_max_action(state, Mark(Maru())) new_state = state.set(3, Mark(Maru())) new_state1 = state.set(4, Mark(Maru())) new_state2 = new_state.set(2, Mark(Batsu())) value.update(state, 10, new_state) value.update(new_state, 10, new_state2) value.update(new_state, 10, new_state2) value.update(new_state1, 10, new_state2) value.update(new_state1, 100, new_state2) #value.update(state, -3, new_state2) print value.get_max_action(state, Mark(Maru()))
from maru_mark import Maru from batsu_mark import Batsu from empty_mark import Empty from tic_tac_toe_state import State from tic_tac_toe_game import Game # 共通行動価値テーブル value = Value() print("Sarsa method:") print("Training com1 and com2.") com_1 = SarsaCom(Mark(Maru()), value) com_2 = SarsaCom(Mark(Batsu()), value) print("Input the number of iterations:") iterations = 10000 while(True): input_line = raw_input() if input_line.isdigit(): iterations = int(input_line) break else: print("Input number:") for i in xrange(iterations): game = Game(com_1, com_2) game.start()
#encoding: utf-8 ''' tic tac toe のSarsa λ実行クラス ''' from tic_tac_toe_sarsa_r_com import SarsaRCom from tic_tac_toe_nn_com import SarsaNNCom from mark import Mark from maru_mark import Maru from batsu_mark import Batsu from tic_tac_toe_game import Game import dill com_1 = SarsaNNCom(Mark(Maru()), 0.1, 0.1, 0.6) com_2 = SarsaRCom(Mark(Batsu()), 0.1, 0.1, 0.6) iterations = 100000 print("Input the number of iterations (%d):" % (iterations)) while(True): input_line = raw_input() if input_line.isdigit(): iterations = int(input_line) break elif input_line == '': break else: print("Input number:") # 学習 for i in xrange(iterations):
# モデルのロード with open('tic_tac_toe_com_1_sarsa_r.pkl', 'rb') as f: com_1 = dill.load(f) with open('tic_tac_toe_com_2_sarsa_r.pkl', 'rb') as f: com_2 = dill.load(f) while (True): print("Select a type of fight [1, 2, 3, q]") print("1: human vs com2") print("2: com1 vs human") print("3: com1 vs com2") print("q: quit") type_of_fight = 1 input_line = raw_input() if input_line.isdigit(): type_of_fight = int(input_line) else: if input_line == 'q': break continue if type_of_fight == 1: game = Game(Player(Mark(Maru())), com_2) elif type_of_fight == 2: game = Game(com_1, Player(Mark(Batsu()))) elif type_of_fight == 3: game = Game(com_1, com_2) game.start(True)
def opponent(self): if self.mark.opponent() == 1: return Maru() elif self.mark.opponent() == 0: return Empty() elif self.mark.opponent() == -1: return Batsu() def to_int(self): return self.mark.to_int() def to_string(self): return self.mark.to_string() if __name__ == '__main__': maru = Mark(Maru()) print maru.to_string() print maru.to_int() print maru.opponent().to_string() print '=' * 10 batsu = Mark(Batsu()) print batsu.to_string() print batsu.to_int() print batsu.opponent().to_string() print '=' * 10 empty = Mark(Empty()) print empty.to_string() print empty.to_int() print empty.opponent().to_string()