Пример #1
0
        j = 0
        actions = []
        # The Q-Network
        while j < max_epLength:  # If the network takes more moves than needed for the field, cancel episode
            j += 1
            # Choose an action by greedily (with e chance of random action) from the Q-network
            if np.random.rand(1) < e or total_steps < pre_train_steps:
                a = np.random.randint(0, num_actions)
            else:
                a = sess.run(mainQN.predict,
                             feed_dict={
                                 mainQN.input:
                                 np.reshape(s, [-1, field_size, field_size, 2])
                             })[0]
            y, x = np.unravel_index(a, (field_size, field_size))
            s1, r, d = game.action(y, x, False)
            total_steps += 1
            episodeBuffer.add(
                np.reshape(np.array([s, a, r, s1, d]),
                           [1, 5]))  # Save experience to episode buffer.

            if total_steps > pre_train_steps and total_steps % update_freq:
                trainBatch = myBuffer.sample(
                    batch_size)  # Get a random batch of experiences.
                # Below we perform the Double-DQN update to the target Q-values
                Q1 = sess.run(mainQN.predict,
                              feed_dict={
                                  mainQN.input:
                                  np.reshape(np.stack(trainBatch[:, 3]),
                                             [-1, field_size, field_size, 2])
                              })