def train(self, episodes=1000, max_steps=800, plot_rewards=True): # Initialize target network weights self.actor.update_target_model(copy=True) self.critic.update_target_model(copy=True) scores, steps = np.empty(episodes), np.empty(episodes) start = time.time() for e in range(episodes): score, step = self.run_episode(max_steps) scores[e], steps[e] = score, step print("Episode:", e, " steps:", step, " score:", score, " time:", time.time() - start) ensure_saved_models_dir() if plot_rewards: t_time = time.time() - start print("Mean score:", np.mean(scores), " Total steps:", np.sum(steps), " total time:", t_time) plot(scores) plot_running_avg(scores) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + str(self.n_neighbors) + "_scores", scores) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + str(self.n_neighbors) + "_time", t_time) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + str(self.n_neighbors) + "_steps", steps)
def train_exploiting_greedy(self, episodes=1000, max_steps=1000, plot_rewards=True): scores = [] e = 0 for _ in range(episodes): trace = [] greedy_reversal_sort(self.env.observation_space.sample(), trace) for __ in range(3): for permutation in trace[::-1]: score = self.run_episode(max_steps, forced=permutation) scores.append(score) print("Episode:", e, " score:", score, " epsilon:", self.epsilon) e += 1 print() print() self.model.save_weights(FINAL_WEIGHTS_PATH) scores = np.array(scores) if plot_rewards: plot(scores) plot_running_avg(scores)
def train(self, episodes=1000, max_steps=1000, plot_rewards=True): # Initialize target network weights scores, steps = np.empty(episodes), np.empty(episodes) start = time.time() break_flag = 0 for e in range(episodes): score, step, loss = self.run_episode(max_steps) scores[e], steps[e] = score, step print("Episode:", e, " steps:", step, " score:", score, " loss:", loss, " time:", time.time() - start) #break_flag = break_flag+1 if step == max_steps else 0 #if break_flag > 60: break saver = tf.train.Saver() saver.save(self.session, self.train_path) if plot_rewards: t_time = time.time() - start print("Mean step:", np.mean(steps), " Total steps:", np.sum(steps), " total time:", t_time) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' + str(self.n_neighbors) + "_scores", scores) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' + str(self.n_neighbors) + "_time", t_time) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' + str(self.n_neighbors) + "_steps", steps) plot(steps) plot_running_avg(steps)
def train(self, episodes=1000, max_steps=1000, plot_rewards=True): scores, steps = np.empty(episodes), np.empty(episodes) start = time.time() for e in range(episodes): score, step = self.run_episode(max_steps) scores[e], steps[e] = score, step print("Episode:", e, " steps:", step, " score:", score, " epsilon:", self.epsilon, " time:", time.time() - start) '''if e%100 == 0: ensure_saved_models_dir() self.model.save_weights(FINAL_WEIGHTS_PATH) print("Weights Saved")''' ensure_saved_models_dir() self.model.save_weights(FINAL_WEIGHTS_PATH) if plot_rewards: t_time = time.time() - start print("Mean score:", np.mean(scores), " Total steps:", np.sum(steps), " total time:", t_time) plot(scores) plot_running_avg(scores) np.save("./train_data/ddqn_" + str(self.state_size) + "_scores", scores) np.save("./train_data/ddqn_" + str(self.state_size) + "_time", t_time) np.save("./train_data/ddqn_" + str(self.state_size) + "_steps", steps)
def train(self, episodes=1000, max_steps=1000, plot_rewards=True): scores = np.empty(episodes) for e in range(episodes): score = self.run_episode(max_steps) scores[e] = score print("Episode:", e, " score:", score, " epsilon:", self.epsilon) ensure_saved_models_dir() self.model.save_weights(FINAL_WEIGHTS_PATH) if plot_rewards: plot(scores) plot_running_avg(scores)
def train(self, n, f_eps, plot_rewards=False, plot_best=False): total_rewards = np.empty(n) bests = np.empty(n) best = None for i in range(n): eps = f_eps(i) total_reward = self._play_one(eps) if best is None or total_reward > best: best = total_reward bests[i] = best total_rewards[i] = total_reward if i % 1 == 0: print("Episode:", i, "Reward:", total_reward, "Best:", best, "Eps:", eps) if plot_rewards: plot_running_avg(total_rewards) if plot_best: plot(bests, 'Bests')
def train(self, episodes=1000, max_steps=800, plot_rewards=True): scores, steps, losses = np.zeros(episodes), np.zeros(episodes), np.zeros(episodes) start = time.time() saver = tf.train.Saver() if self.fill_mem: self.fill_memory() for e in range(episodes): score, step, loss = self.run_episode(max_steps) scores[e], steps[e], losses[e] = score, step, loss print("Episode:", e, " steps:", step, " score: %.1f" % score," loss:", loss, " epsilon:", self.epsilon, " time:", time.time() - start) if math.isnan(loss): break ensure_saved_models_dir() saver.save(self.session, self.train_path) if plot_rewards: t_time = time.time() - start print("Mean score:", np.mean(scores), " Total steps:", np.sum(steps), " total time:", t_time) np.save("./train_data/ddqn_tf_" + str(self.state_size) + "_scores", scores) np.save("./train_data/ddqn_tf_" + str(self.state_size) + "_time", t_time) np.save("./train_data/ddqn_tf_" + str(self.state_size) + "_steps", steps) plot(steps) plot_running_avg(steps) plot_running_avg(losses, title="Losses")
def train(self, episodes=1000, max_steps=800, plot_rewards=True): # Initialize target network weights self.actor.update_target_model(copy=True) self.critic.update_target_model(copy=True) scores, steps = np.empty(episodes), np.empty(episodes) start = time.time() break_flag = 0 for e in range(episodes): score, step = self.run_episode(max_steps) scores[e], steps[e] = score, step print("Episode:", e, " steps:", step, " score:", score, " time:", time.time() - start) if e % 50 == 0 and step == max_steps and self.fill_mem: self.fill_memory() break_flag = break_flag + 1 if step == max_steps else 0 if break_flag > 50 and e >= episodes / 2: break ensure_saved_models_dir() saver = tf.train.Saver() saver.save(self.session, self.train_path) if plot_rewards: t_time = time.time() - start print("Mean score:", np.mean(scores), " Total steps:", np.sum(steps), " total time:", t_time) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + str(self.n_neighbors) + "_scores", scores) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + str(self.n_neighbors) + "_time", t_time) np.save( "./train_data/ddpg_enc_actions" + str(self.state_size) + str(self.n_neighbors) + "_steps", steps) plot(steps) plot_running_avg(steps)