# Make a path for our model to be saved in. if not os.path.exists(path): os.makedirs(path) print("Start training.") with tf.Session() as sess: sess.run(init) if load_model: print('Loading Model...') ckpt = tf.train.get_checkpoint_state(path) saver.restore(sess, ckpt.model_checkpoint_path) for i in range(num_episodes): episodeBuffer = qnetwork.ExperienceBuffer() # Reset environment and get first new observation game = Game(field_size, field_size, mines) s = game.state() d = False rAll = 0 j = 0 actions = [] # The Q-Network while j < max_epLength: # If the network takes more moves than needed for the field, cancel episode j += 1 # Choose an action by greedily (with e chance of random action) from the Q-network if np.random.rand(1) < e or total_steps < pre_train_steps: a = np.random.randint(0, num_actions) else: a = sess.run(mainQN.predict, feed_dict={ mainQN.input: np.reshape(s, [-1, field_size, field_size, 2])