def test_create_network(self):
     input_nodes = 20
     hidden_nodes = (50, 40, 30)
     input_layer, output_layer, variables = create_network(
         input_nodes, hidden_nodes)
     self.assertSequenceEqual(input_layer.get_shape().as_list(),
                              [None, input_nodes])
     self.assertSequenceEqual(output_layer.get_shape().as_list(),
                              [None, input_nodes])
     self.assertEqual(len(variables), (len(hidden_nodes) + 1) * 2)
    def test_save_and_load_network(self):
        try:
            file_name = 'test.p'
            input_nodes = 20
            hidden_nodes = (50, 40, 30)
            _, _, variables1 = create_network(input_nodes, hidden_nodes)
            _, _, variables2 = create_network(input_nodes, hidden_nodes)

            with tf.Session() as session:
                session.run(tf.initialize_all_variables())

                save_network(session, variables1, file_name)
                load_network(session, variables2, file_name)

                for var1, var2 in zip(variables1, variables2):
                    np.testing.assert_array_almost_equal(
                        session.run(var1), session.run(var2))
        finally:
            try:
                os.remove(file_name)
            except OSError:
                pass
示例#3
0
        board_state = game_spec.new_board()
        number_moves = random.randint(*NUMBER_RANDOM_RANGE)
        side = 1
        for _ in range(number_moves):
            board_state = game_spec.apply_move(board_state, random.choice(list(game_spec.available_moves(board_state))),
                                               side)
            if game_spec.has_winner(board_state) != 0:
                # start again if we hit an already winning position
                continue

            side = -side
        return board_state


reinforcement_input_layer, reinforcement_output_layer, reinforcement_variables = create_network(
    game_spec.board_squares(),
    HIDDEN_NODES_REINFORCEMENT,
    game_spec.outputs())

value_input_layer, value_output_layer, value_variables = create_network(game_spec.board_squares(), HIDDEN_NODES_VALUE,
                                                                        output_nodes=1, output_softmax=False)

target_placeholder = tf.placeholder("float", (None, 1))
error = tf.reduce_sum(tf.square(target_placeholder - value_output_layer))

train_step = tf.train.RMSPropOptimizer(LEARN_RATE).minimize(error)

with tf.Session() as session:
    session.run(tf.initialize_all_variables())

    load_network(session, reinforcement_variables, REINFORCEMENT_NETWORK_PATH)
    """If we had a database of games this would load and return it...

    Returns:

    """
    raise Exception(
        "If we had a database of tic-tac-toe games this would load them")


HIDDEN_NODES = (100, 80, 60, 40)  # number of hidden layer neurons
BATCH_SIZE = 100  # every how many games to do a parameter update?
LEARN_RATE = 1e-4
NETWORK_FILE_PATH = 'current_network.p'
game_spec = TicTacToeGameSpec()

input_layer, output_layer, variables = create_network(
    game_spec.board_squares(), HIDDEN_NODES, output_nodes=game_spec.outputs())
actual_move_placeholder = tf.placeholder("float", (None, game_spec.outputs()))

error = tf.reduce_sum(tf.square(actual_move_placeholder - output_layer))
train_step = tf.train.RMSPropOptimizer(LEARN_RATE).minimize(error)

with tf.Session() as session:
    session.run(tf.initialize_all_variables())

    if os.path.isfile(NETWORK_FILE_PATH):
        print("loading existing network")
        load_network(session, variables, NETWORK_FILE_PATH)

    episode_number = 1

    positions_train, positions_test = load_games()
示例#5
0
def train_policy_gradient(network_file_path,
                          save_network_file_path=None,
                          learn_rate=1e-3,
                          number_of_games=50000,
                          print_results_every=1000,
                          batch_size=100):

    print 'parameters => LR : ', learn_rate, ' Batch Size : ', batch_size
    save_network_file_path = save_network_file_path or network_file_path
    actual_move_placeholder = tf.placeholder("float", shape=(None, 100))
    input_layer, output_layer, variables = create_network(100, (100, 100, 100),
                                                          output_softmax=False)

    error = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            logits=output_layer, labels=actual_move_placeholder))
    #error = tf.reduce_sum(tf.square(tf.subtract(actual_move_placeholder, output_layer)), reduction_indices=1)
    train_step = tf.train.AdamOptimizer(learn_rate).minimize(error)

    with tf.Session() as session:
        session.run(tf.initialize_all_variables())

        if network_file_path and os.path.isfile(network_file_path):
            print("loading pre-existing network")
            load_network(session, variables, network_file_path)

        mini_batch_board_states, mini_batch_moves = [], []

        def my_player(board_state, side):
            #printboard(board_state)
            mini_batch_board_states.append(np.ravel(board_state) * side)
            a1 = Agent(side, lossval=-1)
            move_tuple = a1.random_greedy(board_state)
            move = np.zeros(100)
            move[move_tuple[0] * 10 + move_tuple[1]] = 1.
            mini_batch_moves.append(move)
            return move_tuple

        def make_training_move(board_state, side):
            a1 = Agent(side, lossval=-1)
            move = a1.action(board_state)
            return move

        game_length = 0
        #count = set()
        for episode_number in range(1, number_of_games):
            #print 'episode no ',episode_number
            if bool(random.getrandbits(1)):
                #print 'network goes first'
                board_state = emptyboard()
                player_turn = 1
                while True:
                    _available_moves = list(available_moves(board_state))

                    if len(_available_moves) == 0:
                        break
                    if player_turn > 0:
                        #move = random_player(board_state, 1)
                        move = make_training_move(board_state, 1)
                        #_ = my_player(board_state, 1)
                        #print 'network move position ', move
                    else:
                        move = my_player(board_state, -1)
                        #print 'player move position ', move

                    if move not in _available_moves:
                        print 'illegal move'
                        break

                    board_state = apply_move(board_state, move, player_turn)
                    #print board_state

                    winner = gameover(board_state)
                    if winner != 0 and winner != 2:
                        break
                    player_turn = -player_turn
                #printboard(board_state)
                #count.add(tuple(np.array(board_state).ravel()))
            else:
                #print 'player goes first'
                board_state = emptyboard()
                player_turn = -1
                while True:
                    _available_moves = list(available_moves(board_state))

                    if len(_available_moves) == 0:
                        break
                    if player_turn > 0:
                        #move = random_player(board_state, 1)
                        move = make_training_move(board_state, 1)
                        #_ = my_player(board_state, 1)
                        #print 'network move position ', move
                    else:
                        move = my_player(board_state, -1)
                        #print 'player move position ', move

                    if move not in _available_moves:
                        print 'illegal move'
                        break

                    board_state = apply_move(board_state, move, player_turn)
                    #print board_state

                    winner = gameover(board_state)
                    if winner != 0 and winner != 2:
                        break
                    player_turn = -player_turn
                #printboard(board_state)
                #count.add(tuple(np.array(board_state).ravel()))

            last_game_length = len(mini_batch_board_states) - game_length
            game_length += last_game_length

            if episode_number % batch_size == 0:
                np_mini_batch_board_states = np.array(
                    mini_batch_board_states).reshape(
                        game_length,
                        *input_layer.get_shape().as_list()[1:])

                ol, _ = session.run(
                    [output_layer, train_step],
                    feed_dict={
                        input_layer: np_mini_batch_board_states,
                        actual_move_placeholder: mini_batch_moves
                    })

                # print np.array(np_mini_batch_board_states).reshape(10,10)
                # print 'output_layer_move', np.argmax(ol)
                # print 'our_moves', np.argmax(mini_batch_moves)
                #print np.array(ol).shape,np.array(mini_batch_moves).shape
                correct = np.sum(
                    np.argmax(ol, axis=1) == np.argmax(mini_batch_moves,
                                                       axis=1))
                del mini_batch_board_states[:]
                del mini_batch_moves[:]

                print episode_number, ': ', 'accuracy ', correct / float(
                    game_length)
                #print 'distinct final states ', len(count)

                game_length = 0

            if episode_number % print_results_every == 0:
                if network_file_path:
                    save_network(session, variables, save_network_file_path)

        if network_file_path:
            print 'saving final network'
            save_network(session, variables, save_network_file_path)

    return variables
示例#6
0
def train_policy_gradient(network_file_path,
                          save_network_file_path=None,
                          learn_rate=1e-3,
                          number_of_games=50000,
                          print_results_every=1000,
                          batch_size=100):

    print 'parameters => LR : ', learn_rate, ' Batch Size : ', batch_size
    save_network_file_path = save_network_file_path or network_file_path
    target_placeholder = tf.placeholder("float", shape=(None, 1))
    input_layer, output_layer, variables = create_network(
        10, (100, 100, 100, 100, 100), output_nodes=1, output_softmax=False)

    error = tf.reduce_sum(tf.square(target_placeholder - output_layer))
    train_step = tf.train.RMSPropOptimizer(learn_rate).minimize(error)

    with tf.Session() as session:
        session.run(tf.initialize_all_variables())

        if network_file_path and os.path.isfile(network_file_path):
            print("loading pre-existing network")
            load_network(session, variables, network_file_path)

        def make_training_move(board_state, side):
            a1 = Agent(side, lossval=-1)
            move = a1.action(board_state)
            return move

        def make_move(board_state, side):
            a1 = Agent(side, lossval=-1)
            move = a1.random_greedy(board_state)
            return move

        board_states_training = {}
        board_states_test = []
        episode_number = 0
        board_states_training_input = {}

        while len(board_states_training_input) < TRAIN_SAMPLES + TEST_SAMPLES:
            #if len(board_states_training_input)%100 == 0:
            print 'total games ', len(board_states_training_input)
            board_state = emptyboard()
            current_board_states_test = []
            if bool(random.getrandbits(1)):
                side = 1
            else:
                side = -1
            while True:
                board_state = apply_move(board_state,
                                         make_training_move(board_state, side),
                                         side)
                current_board_states_test.append(deepcopy(board_state))
                winner = gameover(board_state)
                if winner != 0:
                    if winner == 2:
                        winner = 0
                    break
                side = -side
            for i in range(len(current_board_states_test)):
                board_state_flat = tuple(np.ravel(
                    current_board_states_test[i]))
                # only accept the board_state if not already in the dict
                if board_state_flat not in board_states_training_input:
                    board_states_training[state_key(
                        current_board_states_test[i])] = float(winner)
                    board_states_training_input[board_state_flat] = 1

        # take a random selection from training into a test set
        for _ in range(TEST_SAMPLES):
            sample = random.choice(list(board_states_training.keys()))
            board_states_test.append((sample, board_states_training[sample]))
            del board_states_training[sample]

        board_states_training = list(board_states_training.items())

        test_error = session.run(error,
                                 feed_dict={
                                     input_layer:
                                     [x[0] for x in board_states_test],
                                     target_placeholder:
                                     [[x[1]] for x in board_states_test]
                                 })

        while True:
            np.random.shuffle(board_states_training)
            train_error = 0

            for start_index in range(
                    0,
                    len(board_states_training) - batch_size + 1, batch_size):
                mini_batch = board_states_training[start_index:start_index +
                                                   batch_size]

                batch_error, _ = session.run(
                    [error, train_step],
                    feed_dict={
                        input_layer: [x[0] for x in mini_batch],
                        target_placeholder: [[x[1]] for x in mini_batch]
                    })
                train_error += batch_error

            new_test_error = session.run(error,
                                         feed_dict={
                                             input_layer:
                                             [x[0] for x in board_states_test],
                                             target_placeholder:
                                             [[x[1]]
                                              for x in board_states_test]
                                         })

            print(
                "episode: %s train_error: %s new_test_error: %s test_error: %s"
                % (episode_number, train_error, new_test_error, test_error))

            if new_test_error > test_error:
                print("train error went up, stopping training")
                break

            test_error = new_test_error
            episode_number += 1

        if network_file_path:
            print 'saving final network'
            save_network(session, variables, save_network_file_path)

    return variables
示例#7
0
NAMES = {0: '_', 1: 'X', -1: 'O'}

def printboard(state):
    cells = []
    print ' ',
    for i in range(BOARD_SIZE):
        print '{0}'.format(str(i).center(5)),
    print '\n'
    for i in range(BOARD_SIZE):
        print i,
        for j in range(BOARD_SIZE):
            print '{0}'.format(NAMES[state[i][j]].center(5)),
        print('\n')

if __name__ == '__main__':
    input_layer, output_layer, variables = create_network(100,(100,100,100))

    with tf.Session() as session:
        session.run(tf.initialize_all_variables())
        # MoonGo_supervised_cross_prob MoonGo_reinforcement
        load_network(session, variables, 'MoonGo_supervised_cross_prob.pickle')
        while 1:
            board_state = emptyboard()
            player_turn = 1

            while True:
                printboard(board_state)
                _available_moves = list(available_moves(board_state))

                if len(_available_moves) == 0:
                    print("no moves left, game ended a draw")
示例#8
0
文件: game.py 项目: lavesh11/MoonGo
def printboard(state):
    cells = []
    print ' ',
    for i in range(BOARD_SIZE):
        print '{0}'.format(str(i).center(5)),
    print '\n'
    for i in range(BOARD_SIZE):
        print i,
        for j in range(BOARD_SIZE):
            print '{0}'.format(NAMES[state[i][j]].center(5)),
        print('\n')


if __name__ == '__main__':
    input_layer, output_layer, variables = create_network(10,
                                                          (10, 10, 10, 10, 10),
                                                          output_nodes=1,
                                                          output_softmax=False)

    with tf.Session() as session:
        session.run(tf.initialize_all_variables())
        # MoonGo_supervised_cross_prob MoonGo_reinforcement
        load_network(session, variables, 'MoonGo_reinforcement.pickle')
        while 1:
            board_state = emptyboard()
            player_turn = 1

            while True:
                printboard(board_state)
                _available_moves = list(available_moves(board_state))

                if len(_available_moves) == 0: