def playGame(me, thehost, sess): sock = initClient(me, thehost) othello = Othello(0, state, me) while (True): print "Read" status = readMessage(sock) othello.reset(status[1], state, me) if (status[0] == me): if status[1] < 30: action_index = othello.smartMove() else: _state = get_state(me) _moves = valid_moves(getValidMoves(status[1], me)) q = sess.run(out, feed_dict={ "train/inputs:0": [_state], "train/moves:0": [_moves] })[0] action = nonzero_max(q) action_index = (action//8, action % 8) sel = str(action_index[0]) + "\n" + str(action_index[1]) + "\n" print "<" + sel + ">" sock.send(sel) print "sent the message" else: print "It isn't my turn" return
sess = tf.InteractiveSession() # Set hyper parameters and variables M = 1000; '''Number of episodes''' LEARNING_RATE = 0.001; '''Base learning rate''' GAMMA = 0.99; '''Discount factor''' BUFFER_SIZE = 100000 BATCH_SIZE = 64 RANDOM_SEED = 1234 dim = 8 # Set up environment env = Othello(dim) state = state_prime = env.reset() action = np.zeros(len(state)) # create deep q network agent = DeepQNetwork(sess, state, action, LEARNING_RATE, 0.001, GAMMA) sess.run(tf.initialize_all_variables()) agent.update_target_network() # Initialize replay buffer Replay Replay = ReplayBuffer(BUFFER_SIZE, random_seed=RANDOM_SEED, prioritized=False) def nonzero_max(actions): indices = np.nonzero(actions)[0] mapping = [] for index in indices: