class TestCreatePositionsSet(TestCase): def setUp(self): self._game_spec = TicTacToeGameSpec() def test_create_positions(self): number_of_positions = 100 positions = create_positions_set( self._game_spec, number_of_positions, self._game_spec.get_random_player_func()) self.assertGreater(len(positions), number_of_positions - 1)
if not args.hidden_layers: args.hidden_layers = (100, 100, 100) # create_network_func = functools.partial(create_network, game_spec.board_squares(), (100, 100, 100)) create_network_func = functools.partial(create_network, game_spec.board_squares(), args.hidden_layers) network_file_path = 'current_network' for n in args.hidden_layers: network_file_path = network_file_path + ("_%05d" % n) network_file_path = network_file_path + ".p" random_opponent = game_spec.get_random_player_func() perfect_opponent = game_spec.get_perfect_player() def mixed_opponent(*args, **kwds): opponent = random.choice([random_opponent, perfect_opponent]) return opponent(*args, **kwds) if args.opponent == "random": opponent_func = random_opponent elif args.opponent == "perfect": opponent_func = perfect_opponent elif args.opponent == "mixed": opponent_func = mixed_opponent else:
mini_batch_board_states, mini_batch_moves, mini_batch_rewards = [], [], [] results = collections.deque(maxlen=PRINT_RESULTS_EVERY_X) def make_training_move(board_state, side): mini_batch_board_states.append(np.ravel(board_state) * side) move = get_stochastic_network_move(session, input_layer, output_layer, board_state, side) mini_batch_moves.append(move) return game_spec.flat_move_to_tuple(move.argmax()) for episode_number in range(1, NUMBER_OF_GAMES_TO_RUN): # randomize if going first or second if bool(random.getrandbits(1)): reward = game_spec.play_game(make_training_move, game_spec.get_random_player_func()) else: reward = -game_spec.play_game(game_spec.get_random_player_func(), make_training_move) results.append(reward) last_game_length = len(mini_batch_board_states) - len( mini_batch_rewards) # we scale here so winning quickly is better winning slowly and loosing slowly better than loosing quick reward /= float(last_game_length) mini_batch_rewards += ([reward] * last_game_length) if episode_number % BATCH_SIZE == 0: