示例#1
0
def playGame(me, thehost, sess):
    sock = initClient(me, thehost)
    othello = Othello(0, state, me)

    while (True):
        print "Read"
        status = readMessage(sock)
        othello.reset(status[1], state, me)

        if (status[0] == me):
            if status[1] < 30:
                action_index = othello.smartMove()
            else:
                _state = get_state(me)
                _moves = valid_moves(getValidMoves(status[1], me))
                q = sess.run(out, feed_dict={
                    "train/inputs:0": [_state], "train/moves:0": [_moves]
                })[0]
                action = nonzero_max(q)
                action_index = (action//8, action % 8)

            sel = str(action_index[0]) + "\n" + str(action_index[1]) + "\n"
            print "<" + sel + ">"
            sock.send(sel)
            print "sent the message"
        else:
            print "It isn't my turn"

    return
示例#2
0
x_data = [-1]
y_data = [-100]
win = 0
lose = 0

f = open('win-lose.txt', 'w')

for episode in tqdm(range(M)):

    state = state_prime = env.reset()

    while not env.over():

        if env.get_round() < 50:
            action_index = env.smartMove()
            action = action_index[1] + (action_index[0] * dim)
            state_prime, reward, terminal = env.step(action_index)
        else:
            moves = env.valid_moves()
            q = agent.q([state], [moves])[0]
            action = nonzero_max(q)
            action_index = (action//dim, action % dim)
            if episode < 200 and episode % 2 == 0:
                action_index = env.randomMove()
                action = action_index[1] + (action_index[0] * dim)

            state_prime, reward, terminal = env.step(action_index)
            moves_prime = env.valid_moves()
            q = agent.target_q([state_prime], [moves_prime])[0]