class TestCreatePositionsSet(TestCase):
    def setUp(self):
        self._game_spec = TicTacToeGameSpec()

    def test_create_positions(self):
        number_of_positions = 100
        positions = create_positions_set(
            self._game_spec, number_of_positions,
            self._game_spec.get_random_player_func())

        self.assertGreater(len(positions), number_of_positions - 1)
Пример #2
0
if not args.hidden_layers:
    args.hidden_layers = (100, 100, 100)

# create_network_func = functools.partial(create_network, game_spec.board_squares(), (100, 100, 100))
create_network_func = functools.partial(create_network,
                                        game_spec.board_squares(),
                                        args.hidden_layers)

network_file_path = 'current_network'
for n in args.hidden_layers:
    network_file_path = network_file_path + ("_%05d" % n)

network_file_path = network_file_path + ".p"

random_opponent = game_spec.get_random_player_func()
perfect_opponent = game_spec.get_perfect_player()


def mixed_opponent(*args, **kwds):
    opponent = random.choice([random_opponent, perfect_opponent])
    return opponent(*args, **kwds)


if args.opponent == "random":
    opponent_func = random_opponent
elif args.opponent == "perfect":
    opponent_func = perfect_opponent
elif args.opponent == "mixed":
    opponent_func = mixed_opponent
else:
Пример #3
0
    mini_batch_board_states, mini_batch_moves, mini_batch_rewards = [], [], []
    results = collections.deque(maxlen=PRINT_RESULTS_EVERY_X)

    def make_training_move(board_state, side):
        mini_batch_board_states.append(np.ravel(board_state) * side)
        move = get_stochastic_network_move(session, input_layer, output_layer,
                                           board_state, side)
        mini_batch_moves.append(move)
        return game_spec.flat_move_to_tuple(move.argmax())

    for episode_number in range(1, NUMBER_OF_GAMES_TO_RUN):
        # randomize if going first or second
        if bool(random.getrandbits(1)):
            reward = game_spec.play_game(make_training_move,
                                         game_spec.get_random_player_func())
        else:
            reward = -game_spec.play_game(game_spec.get_random_player_func(),
                                          make_training_move)

        results.append(reward)

        last_game_length = len(mini_batch_board_states) - len(
            mini_batch_rewards)

        # we scale here so winning quickly is better winning slowly and loosing slowly better than loosing quick
        reward /= float(last_game_length)

        mini_batch_rewards += ([reward] * last_game_length)

        if episode_number % BATCH_SIZE == 0: