def playGame(me, thehost, sess): sock = initClient(me, thehost) othello = Othello(0, state, me) while (True): print "Read" status = readMessage(sock) othello.reset(status[1], state, me) if (status[0] == me): if status[1] < 30: action_index = othello.smartMove() else: _state = get_state(me) _moves = valid_moves(getValidMoves(status[1], me)) q = sess.run(out, feed_dict={ "train/inputs:0": [_state], "train/moves:0": [_moves] })[0] action = nonzero_max(q) action_index = (action//8, action % 8) sel = str(action_index[0]) + "\n" + str(action_index[1]) + "\n" print "<" + sel + ">" sock.send(sel) print "sent the message" else: print "It isn't my turn" return
x_data = [-1] y_data = [-100] win = 0 lose = 0 f = open('win-lose.txt', 'w') for episode in tqdm(range(M)): state = state_prime = env.reset() while not env.over(): if env.get_round() < 50: action_index = env.smartMove() action = action_index[1] + (action_index[0] * dim) state_prime, reward, terminal = env.step(action_index) else: moves = env.valid_moves() q = agent.q([state], [moves])[0] action = nonzero_max(q) action_index = (action//dim, action % dim) if episode < 200 and episode % 2 == 0: action_index = env.randomMove() action = action_index[1] + (action_index[0] * dim) state_prime, reward, terminal = env.step(action_index) moves_prime = env.valid_moves() q = agent.target_q([state_prime], [moves_prime])[0]