Пример #1
0
# Make a path for our model to be saved in.
if not os.path.exists(path):
    os.makedirs(path)

print("Start training.")
with tf.Session() as sess:
    sess.run(init)
    if load_model:
        print('Loading Model...')
        ckpt = tf.train.get_checkpoint_state(path)
        saver.restore(sess, ckpt.model_checkpoint_path)
    for i in range(num_episodes):
        episodeBuffer = qnetwork.ExperienceBuffer()
        # Reset environment and get first new observation
        game = Game(field_size, field_size, mines)
        s = game.state()
        d = False
        rAll = 0
        j = 0
        actions = []
        # The Q-Network
        while j < max_epLength:  # If the network takes more moves than needed for the field, cancel episode
            j += 1
            # Choose an action by greedily (with e chance of random action) from the Q-network
            if np.random.rand(1) < e or total_steps < pre_train_steps:
                a = np.random.randint(0, num_actions)
            else:
                a = sess.run(mainQN.predict,
                             feed_dict={
                                 mainQN.input:
                                 np.reshape(s, [-1, field_size, field_size, 2])