示例#1
0
def playGame(me, thehost, sess):
    sock = initClient(me, thehost)
    othello = Othello(0, state, me)

    while (True):
        print "Read"
        status = readMessage(sock)
        othello.reset(status[1], state, me)

        if (status[0] == me):
            if status[1] < 30:
                action_index = othello.smartMove()
            else:
                _state = get_state(me)
                _moves = valid_moves(getValidMoves(status[1], me))
                q = sess.run(out, feed_dict={
                    "train/inputs:0": [_state], "train/moves:0": [_moves]
                })[0]
                action = nonzero_max(q)
                action_index = (action//8, action % 8)

            sel = str(action_index[0]) + "\n" + str(action_index[1]) + "\n"
            print "<" + sel + ">"
            sock.send(sel)
            print "sent the message"
        else:
            print "It isn't my turn"

    return
示例#2
0
sess = tf.InteractiveSession()

# Set hyper parameters and variables
M = 1000; '''Number of episodes'''
LEARNING_RATE = 0.001; '''Base learning rate'''
GAMMA = 0.99; '''Discount factor'''

BUFFER_SIZE = 100000
BATCH_SIZE = 64
RANDOM_SEED = 1234
dim = 8

# Set up environment
env = Othello(dim)
state = state_prime = env.reset()
action = np.zeros(len(state))

# create deep q network
agent = DeepQNetwork(sess, state, action, LEARNING_RATE, 0.001, GAMMA)
sess.run(tf.initialize_all_variables())
agent.update_target_network()

# Initialize replay buffer Replay
Replay = ReplayBuffer(BUFFER_SIZE, random_seed=RANDOM_SEED, prioritized=False)


def nonzero_max(actions):
    indices = np.nonzero(actions)[0]
    mapping = []
    for index in indices: