def main(): import argparse parser = argparse.ArgumentParser( "Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-g', '--n_games', type=int, default=100, help='Number of games to play.') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) import tarfile with tarfile.open('input.tar.gz') as tar: tar.extractall() import construct_dnn model = construct_dnn.construct_dnn() model.load('tf_model/tf_model') import player_A, player_B player_A.tf_predict_u.model = player_B.tf_predict_u.model = model player_A.initialize() player_B.initialize() p1 = Player('Black') p1.strategy = player_A.strategy p2 = Player('White') p2.strategy = player_B.strategy p1.strategy.learndata = pickle.load(open('black.learndata', 'rb')) p2.strategy.learndata = pickle.load(open('white.learndata', 'rb')) game.players = [p1, p2] if args.n_games > 1: game.fastmode = 2 def playone(i, game_output, winner_board): game.reset() winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner)) game_output.flush() winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt', 'w') as game_output: for i_game in xrange(args.n_games): playone(i_game, game_output, winner_board) print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d" % (name, nwin)) # save the learndata to disk pickle.dump(p1.strategy.learndata, open('newblack.learndata', 'wb')) pickle.dump(p2.strategy.learndata, open('newwhite.learndata', 'wb')) print('%d black learndata and %d white learndata saved!' % (len(p1.strategy.learndata), len(p2.strategy.learndata))) with tarfile.open('output.tar.gz', 'w:gz') as tar: tar.add('newblack.learndata') tar.add('newwhite.learndata')
def main(): import argparse parser = argparse.ArgumentParser("Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_game', type=int, default=10, help='Play a number of games to gather statistics.') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) import construct_dnn model = construct_dnn.construct_dnn() model.load('bench_model/tf_model') #model.load('trained_model_001/trained_model_001') #model.load('trained_model_002/trained_model_002') #model.load('trained_model_003/trained_model_003') #model.load('trained_model_007/trained_model_007') import player_A player_A.tf_predict_u.model = model player_A.initialize() p1 = Player('TF') p1.strategy = player_A.strategy player_A.estimate_level = 1 player_A.t_random = 0.01 import ai_bench ai_bench.initialize() p2 = Player('BenchAI') p2.strategy = ai_bench.strategy ai_bench.estimate_level = 4 ai_bench.t_random = 0.001 game.players = [p2, p1] if args.n_game > 1: game.fastmode = 2 else: player_A.show_q = ai_bench.show_q = True def playone(i, game_output, winner_board): game.reset() winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n'%(i+1, winner)) game_output.flush() winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt','w') as game_output: for i_game in xrange(args.n_game): playone(i_game, game_output, winner_board) p1.strategy.learndata = dict() print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d"%(name, nwin))
def main(): import argparse parser = argparse.ArgumentParser( "Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.') parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.') parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model") args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) import construct_dnn model = construct_dnn.construct_dnn() model.load('initial_model/tf_model') import player_A, player_B player_A.tf_predict_u.model = player_B.tf_predict_u.model = model player_A.initialize() player_B.initialize() p1 = Player('Black') p1.strategy = player_A.strategy p2 = Player('White') p2.strategy = player_B.strategy # set up linked learndata and cache (allow AI to look into opponent's data) p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() game.players = [p1, p2] if args.train_step > 1: game.fastmode = 2 else: player_A.show_q = player_B.show_q = True def playone(i, game_output, winner_board): game.reset() winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner)) game_output.flush() print("Training the model for %d iterations." % args.n_train) for i_train in xrange(args.n_train): # check if the current model exists model_name = "trained_model_%03d" % i_train assert not os.path.exists( model_name), "Current model %s already exists!" % model_name # create and enter the model folder os.mkdir(model_name) os.chdir(model_name) # play the games print("Training model %s" % model_name) winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt', 'w') as game_output: for i_game in xrange(args.train_step): playone(i_game, game_output, winner_board) print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d" % (name, nwin)) # collect training data train_X, train_Y = prepare_train_data(p1.strategy.learndata, p2.strategy.learndata) # reset the learndata and cache, release memory p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() # fit the tf model model.fit(train_X, train_Y, n_epoch=args.n_epoch, validation_set=0.1, shuffle=True, show_metric=True) model.save('tf_model') print("Model %s saved!" % model_name) os.chdir('..')
def main(): import argparse parser = argparse.ArgumentParser( "Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.') parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.') parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model") parser.add_argument('-l', '--begin_lib', help='Begin board library file') parser.add_argument('-p', '--begin_lib_p', type=float, default=1.0, help='Possibility of begin lib to be used') parser.add_argument('--new', action='store_true', help='Start from new model') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) import construct_dnn model = construct_dnn.construct_dnn() if not args.new: model.load('initial_model/tf_model') import player_A, player_B player_A.tf_predict_u.model = player_B.tf_predict_u.model = model player_A.initialize() player_B.initialize() p1 = Player('Black') p1.strategy = player_A.strategy p2 = Player('White') p2.strategy = player_B.strategy # set up linked learndata and cache (allow AI to look into opponent's data) p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() game.players = [p1, p2] if args.train_step > 1: game.fastmode = 2 else: player_A.show_q = player_B.show_q = True allstones = set([(r, c) for r in range(1, 16) for c in range(1, 16)]) if args.begin_lib != None: begin_lib = __import__(args.begin_lib).begin_lib else: begin_lib = None def playone(i, game_output, winner_board): game.reset() player_A.reset() player_B.reset() if random.random() < args.begin_lib_p: game.board = gen_begin_board(allstones, begin_lib) else: game.board = gen_begin_board(allstones, None) # randomly assign a black stone to be the last move game.last_move = next(iter(game.board[0])) winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n' % (i + 1, winner)) game_output.flush() print("Training the model for %d iterations." % args.n_train) for i_train in range(args.n_train): # check if the current model exists model_name = "trained_model_%03d" % i_train assert not os.path.exists( model_name), "Current model %s already exists!" % model_name # create and enter the model folder os.mkdir(model_name) os.chdir(model_name) # play the games print("Training model %s" % model_name) winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt', 'w') as game_output: for i_game in range(args.train_step): playone(i_game, game_output, winner_board) print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d" % (name, nwin)) # collect training data train_X, train_Y = prepare_train_data(p1.strategy.learndata, p2.strategy.learndata) # reset the learndata and cache, release memory p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() # fit the tf model #model.trainer.training_state.step = 0 # reset the training step so lr_decay is reset model.fit(train_X, train_Y, n_epoch=args.n_epoch, validation_set=0.1, shuffle=True, show_metric=True) model.save('tf_model') print("Model %s saved!" % model_name) os.chdir('..')
def main(): import argparse parser = argparse.ArgumentParser("Play the Gomoku Game!", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-n', '--n_train', type=int, default=10, help='Play a number of games to gather statistics.') parser.add_argument('-t', '--train_step', type=int, default=100, help='Train a new model after this number of games.') parser.add_argument('-e', '--n_epoch', type=int, default=100, help="Number of epochs for each training model") parser.add_argument('-l', '--begin_lib', help='Begin board library file') parser.add_argument('-p', '--begin_lib_p', type=float, default=1.0, help='Possibility of begin lib to be used') parser.add_argument('--new', action='store_true', help='Start from new model') args = parser.parse_args() game = Gomoku(board_size=15, first_center=False) from tf_model import get_new_model, load_existing_model, save_model if args.new: model = get_new_model() else: model = load_existing_model('initial_model/tf_model.h5') import player_A, player_B player_A.tf_predict_u.model = player_B.tf_predict_u.model = model player_A.initialize() player_B.initialize() p1 = Player('Black') p1.strategy = player_A.strategy p2 = Player('White') p2.strategy = player_B.strategy # set up linked learndata and cache (allow AI to look into opponent's data) p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() game.players = [p1, p2] if args.train_step > 1: game.fastmode = 2 else: player_A.show_q = player_B.show_q = True allstones = set([(r,c) for r in range(1,16) for c in range(1,16)]) if args.begin_lib != None: begin_lib = __import__(args.begin_lib).begin_lib else: begin_lib = None def playone(i, game_output, winner_board, replay=False): game.reset() player_A.reset() player_B.reset() if replay: game.board = copy.deepcopy(game.last_begin_board) else: if random.random() < args.begin_lib_p: game.board = gen_begin_board(allstones, begin_lib) else: game.board = gen_begin_board(allstones, None) # store the begin board game.last_begin_board = copy.deepcopy(game.board) # randomly assign a black stone to be the last move game.last_move = next(iter(game.board[0])) winner = game.play() winner_board[winner] += 1 game_output.write('Game %-4d: Winner is %s\n'%(i+1, winner)) game_output.flush() print("Training the model for %d iterations."%args.n_train) last_i_train = -1 for i_train in range(args.n_train): # check if the current model exists model_name = "trained_model_%03d" % i_train if os.path.exists(model_name): print(f"Folder {model_name} exists") last_i_train = i_train else: break if last_i_train >= 0: # try to load the last trained model model_name = "trained_model_%03d" % last_i_train model_fnm = os.path.join(model_name, 'tf_model.h5') if os.path.exists(model_fnm): model = load_existing_model(model_fnm) print(f"Loaded trained model from {model_fnm}") else: # if last trained model not exist, load the previous model if last_i_train > 0: prev_model_name = f"trained_model_{last_i_train-1:03d}" prev_model_fnm = os.path.join(prev_model_name, 'tf_model.h5') model = load_existing_model(prev_model_fnm) print(f"Loaded lastest model from {prev_model_fnm}") # try to reuse data and start training train_data_fnm = os.path.join(model_name, 'data.h5') if os.path.exists(train_data_fnm): train_X, train_Y, train_W = load_data_h5(train_data_fnm) print(f"Training data loaded from {train_data_fnm}, start training") model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2, shuffle=True, sample_weight=train_W) save_model(model, model_fnm) print("Model %s saved!" % model_name) else: # delete this folder and start again shutil.rmtree(model_name) print(f"Deleting folder {model_name}") last_i_train -= 1 for i_train in range(last_i_train+1, args.n_train): model_name = "trained_model_%03d" % i_train # create and enter the model folder os.mkdir(model_name) os.chdir(model_name) # play the games print("Training model %s" % model_name) winner_board = collections.OrderedDict([(p.name, 0) for p in game.players]) winner_board["Draw"] = 0 with open('game_results.txt','w') as game_output: replay_last_game = False for i_game in range(args.train_step): if replay_last_game: print("Repeating the starting board of last game") playone(i_game, game_output, winner_board, replay=replay_last_game) replay_last_game = any(player.strategy.surprised for player in game.players) print("Name | Games Won") for name, nwin in winner_board.items(): print("%-7s | %7d"%(name, nwin)) # collect training data train_X, train_Y, train_W = prepare_train_data(p1.strategy.learndata, p2.strategy.learndata) # reset the learndata and cache, release memory p1.strategy.learndata = p2.strategy.opponent_learndata = dict() p2.strategy.learndata = p1.strategy.opponent_learndata = dict() player_A.tf_predict_u.cache = player_B.tf_predict_u.cache = dict() # fit the tf model model.fit(train_X, train_Y, epochs=args.n_epoch, validation_split=0.2, shuffle=True, sample_weight=train_W) save_model(model, 'tf_model.h5') print("Model %s saved!" % model_name) os.chdir('..')
s = line[4:].strip() s = s.replace('-', '0') s = s.replace('x', '1') s = s.replace('o', '-1') state.append(s.split()) print(state) return np.array(state, dtype=np.int8) state = read_state('state_debug.txt') player_A.print_state(state) last_move = (4, 11) player_A.print_state(state) import construct_dnn model = construct_dnn.construct_dnn() player_A.initialize() player_A.strategy((({(1, 1)}, {}), (1, 1), 1, 15)) player_A.tf_predict_u.model = model model.load('../../auto_playok_com/tf_model') player_A.best_action_q(state, 12342, 180, last_move, -1, 1, 1, -1) import IPython IPython.embed() #player_A.find_interesting_moves(state, 120, np.zeros((15,15)), 1, 50, True)