def generate_random_state(self): new_state = [0, 0, 0, 0, 0, set()] board_positions = frozenset(range(self.board_positions)) new_state[0] = random.choice((YELLOW, GREEN)) exclude = self.goal_positions[YELLOW] new_state[1] = random.choice(tuple(board_positions - exclude)) # new_state[1] = random.choice(range(72, 81)) # goal line exclude = self.goal_positions[GREEN] | set([new_state[1]]) new_state[2] = random.choice(tuple(board_positions - exclude)) # new_state[2] = new_state[1] + 9 all_wall_positions = tuple(range(self.wall_moves)) place_walls = random.randint(0, 2 * STARTING_WALL_COUNT) # place_walls = 20 walls_used = random.randint( max(0, place_walls - STARTING_WALL_COUNT), min(STARTING_WALL_COUNT, place_walls) ) new_state[3] = STARTING_WALL_COUNT - walls_used new_state[4] = STARTING_WALL_COUNT - place_walls + walls_used while place_walls: action = random.choice(all_wall_positions) if self.is_wall_crossing(new_state[5], action): continue new_state[5].add(action) if not self.players_can_reach_goal(new_state): new_state[5].remove(action) continue place_walls -= 1 new_state[5] = frozenset(new_state[5]) context = QuoridorContext(self) context.reset(state=tuple(new_state)) self.display_on_console(context) print context
def train(colors_on, special): game = ConsoleGame(console_colors=colors_on, special_chars=special) opponent = HeuristicPlayer(game) # INIT TENSORFLOW session = tf.Session() ann = TFPlayer(game, session) total_game_num = model_load_or_init(ann) context = QuoridorContext(game) get_players = players_creator_factory(opponent, 'heuristic', ann) players = get_players() # INIT GAME context.reset(players=players) state = input_vector_from_game_state(context) state = np.array(list(state)).reshape([1, ann.input]) game_num = 0 move = 0 start = time.clock() while True: # store current state ann.input_vectors[move, :] = state # proceed to next state opponent.play(context) state = input_vector_from_game_state(context) state = np.array(list(state)).reshape([1, ann.input]) # update desired vector action = context.last_action sign = 1 - context.state[0] * 2 # 1 or -1 ann.desired_vectors[move, action] = 100 * sign move += 1 if context.is_terminal: game_num += 1 total_game_num += 1 context.reset(players=players) state = input_vector_from_game_state(context) state = np.array(list(state)).reshape([1, ann.input]) if game_num % SHOW_STATUS_STEP == 0: print_status(start, total_game_num, game_num) if game_num % SAVE_STATUS_STEP == 0: filename = TRAINING_FILENAME_FMT.format(num=total_game_num) ann.save(os.path.join(CKPT_MODELS_PATH, filename)) if move == ann.batch: # TODO: create list of q_values: # y = [1, 1*lr, 1*(lr**2), ...] # g = -y + 1 # and use them for learning session.run(ann.train_step, feed_dict=ann.feed_dict) move = 0
def tf_play(colors_on, special): game = ConsoleGame(console_colors=colors_on, special_chars=special) # INIT TENSORFLOW session = tf.Session() ann = TFPlayer(game, session) filename = ann.last_model_filename() if filename is None: raise Exception('Could not load ANN data.') ann.load(filename) kwargs = { 'messages': game.messages, 'game_controls': game.GAME_CONTROLS, 'fail_callback': game.wrong_human_move, } hp = HumanPlayer(game, **kwargs) context = QuoridorContext(game) get_players = players_creator_factory(hp, 'human', ann) context.reset(players=get_players()) while not context.is_terminal: game.display_on_console(context) context.current['player'](context) game.display_on_console(context) session.close()
def handle_menu(self): context = QuoridorContext(self) game_mode = 'menu' while game_mode != 'quit': self.print_menu() choice = self.get_menu_input() if isinstance(choice, basestring): if choice == 'unknown': print self.messages['unknown_choice'] continue elif choice == 'quit': return game_mode = choice['mode'] if game_mode == 'quit': return elif game_mode == 'random_state': self.generate_random_state() continue elif game_mode == 'save': if not context.history: print self.messages['cannot_save'] continue game_mode = self.save_menu(context) continue if game_mode == 'load': game_mode = self.load_menu(context) if game_mode == 'game': game_mode = self.handle_game(context) continue players = self.get_players(choice['player_names']) context.reset(players=players) if game_mode == 'game': game_mode = self.handle_game(context) continue elif game_mode == 'train': game_mode = self.train(context) continue
def measure(colors_on, special, opponent_type): game = ConsoleGame(console_colors=colors_on, special_chars=special) opponent = OPPONENTS[opponent_type](game) # INIT TENSORFLOW # session = tf.Session() # ann = TFPlayer(game, session) # saver = tf.train.Saver() # saver.restore(session, 'model.ckpt') # print 'tfplayer input layer size:', ann.input # print 'x'*80 ann = RandomPlayerWithPath(game) context = QuoridorContext(game) while True: get_players = players_creator_factory(opponent, opponent_type, ann) context.reset(players=get_players()) while not context.is_terminal: context.current['player'](context) game.display_on_console(context) break