Пример #1
0
    def train(self, episodes=1000, max_steps=800, plot_rewards=True):
        # Initialize target network weights
        self.actor.update_target_model(copy=True)
        self.critic.update_target_model(copy=True)
        scores, steps = np.empty(episodes), np.empty(episodes)
        start = time.time()
        for e in range(episodes):
            score, step = self.run_episode(max_steps)
            scores[e], steps[e] = score, step
            print("Episode:", e, "  steps:", step, "  score:", score,
                  "  time:",
                  time.time() - start)

        ensure_saved_models_dir()

        if plot_rewards:
            t_time = time.time() - start
            print("Mean score:", np.mean(scores), " Total steps:",
                  np.sum(steps), " total time:", t_time)
            plot(scores)
            plot_running_avg(scores)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_scores", scores)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_time", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_steps", steps)
    def train_exploiting_greedy(self,
                                episodes=1000,
                                max_steps=1000,
                                plot_rewards=True):
        scores = []
        e = 0
        for _ in range(episodes):
            trace = []
            greedy_reversal_sort(self.env.observation_space.sample(), trace)
            for __ in range(3):
                for permutation in trace[::-1]:
                    score = self.run_episode(max_steps, forced=permutation)
                    scores.append(score)
                    print("Episode:", e, "  score:", score, "  epsilon:",
                          self.epsilon)
                    e += 1
                print()
            print()

        self.model.save_weights(FINAL_WEIGHTS_PATH)

        scores = np.array(scores)
        if plot_rewards:
            plot(scores)
            plot_running_avg(scores)
Пример #3
0
    def train(self, episodes=1000, max_steps=1000, plot_rewards=True):
        # Initialize target network weights
        scores, steps = np.empty(episodes), np.empty(episodes)
        start = time.time()
        break_flag = 0
        for e in range(episodes):
            score, step, loss = self.run_episode(max_steps)
            scores[e], steps[e] = score, step
            print("Episode:", e, "  steps:", step, "  score:", score,
                  "  loss:", loss, "  time:",
                  time.time() - start)
            #break_flag = break_flag+1 if step == max_steps else 0
            #if break_flag > 60: break
        saver = tf.train.Saver()
        saver.save(self.session, self.train_path)

        if plot_rewards:
            t_time = time.time() - start
            print("Mean step:", np.mean(steps), " Total steps:", np.sum(steps),
                  " total time:", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' +
                str(self.n_neighbors) + "_scores", scores)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' +
                str(self.n_neighbors) + "_time", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) + '_' +
                str(self.n_neighbors) + "_steps", steps)
            plot(steps)
            plot_running_avg(steps)
Пример #4
0
    def train(self, episodes=1000, max_steps=1000, plot_rewards=True):
        scores, steps = np.empty(episodes), np.empty(episodes)
        start = time.time()
        for e in range(episodes):
            score, step = self.run_episode(max_steps)
            scores[e], steps[e] = score, step
            print("Episode:", e, "  steps:", step, "  score:", score,
                  "  epsilon:", self.epsilon, "  time:",
                  time.time() - start)
            '''if e%100 == 0:
				ensure_saved_models_dir()
				self.model.save_weights(FINAL_WEIGHTS_PATH)
				print("Weights Saved")'''
        ensure_saved_models_dir()
        self.model.save_weights(FINAL_WEIGHTS_PATH)

        if plot_rewards:
            t_time = time.time() - start
            print("Mean score:", np.mean(scores), " Total steps:",
                  np.sum(steps), " total time:", t_time)
            plot(scores)
            plot_running_avg(scores)
            np.save("./train_data/ddqn_" + str(self.state_size) + "_scores",
                    scores)
            np.save("./train_data/ddqn_" + str(self.state_size) + "_time",
                    t_time)
            np.save("./train_data/ddqn_" + str(self.state_size) + "_steps",
                    steps)
    def train(self, episodes=1000, max_steps=1000, plot_rewards=True):
        scores = np.empty(episodes)
        for e in range(episodes):
            score = self.run_episode(max_steps)
            scores[e] = score
            print("Episode:", e, "  score:", score, "  epsilon:", self.epsilon)

        ensure_saved_models_dir()
        self.model.save_weights(FINAL_WEIGHTS_PATH)

        if plot_rewards:
            plot(scores)
            plot_running_avg(scores)
Пример #6
0
    def train(self, n, f_eps, plot_rewards=False, plot_best=False):
        total_rewards = np.empty(n)
        bests = np.empty(n)
        best = None
        for i in range(n):
            eps = f_eps(i)
            total_reward = self._play_one(eps)
            if best is None or total_reward > best:
                best = total_reward
            bests[i] = best
            total_rewards[i] = total_reward
            if i % 1 == 0:
                print("Episode:", i, "Reward:", total_reward, "Best:", best,
                      "Eps:", eps)

        if plot_rewards:
            plot_running_avg(total_rewards)

        if plot_best:
            plot(bests, 'Bests')
Пример #7
0
	def train(self, episodes=1000, max_steps=800, plot_rewards=True):
		scores, steps, losses = np.zeros(episodes), np.zeros(episodes), np.zeros(episodes)
		start = time.time()
		saver = tf.train.Saver()
		if self.fill_mem:
			self.fill_memory()
		for e in range(episodes):
			score, step, loss = self.run_episode(max_steps)
			scores[e], steps[e], losses[e] = score, step, loss
			print("Episode:", e, "  steps:", step, "  score: %.1f" % score,"  loss:", loss, "  epsilon:", self.epsilon, "  time:", time.time() - start)
			if math.isnan(loss): break
		ensure_saved_models_dir()
		saver.save(self.session, self.train_path)

		if plot_rewards:
			t_time = time.time() - start
			print("Mean score:", np.mean(scores), " Total steps:", np.sum(steps), " total time:", t_time)
			np.save("./train_data/ddqn_tf_" + str(self.state_size) + "_scores", scores)
			np.save("./train_data/ddqn_tf_" + str(self.state_size) + "_time", t_time)
			np.save("./train_data/ddqn_tf_" + str(self.state_size) + "_steps", steps)
			plot(steps)
			plot_running_avg(steps)
			plot_running_avg(losses, title="Losses")
Пример #8
0
    def train(self, episodes=1000, max_steps=800, plot_rewards=True):
        # Initialize target network weights
        self.actor.update_target_model(copy=True)
        self.critic.update_target_model(copy=True)
        scores, steps = np.empty(episodes), np.empty(episodes)
        start = time.time()
        break_flag = 0
        for e in range(episodes):
            score, step = self.run_episode(max_steps)
            scores[e], steps[e] = score, step
            print("Episode:", e, "  steps:", step, "  score:", score,
                  "  time:",
                  time.time() - start)
            if e % 50 == 0 and step == max_steps and self.fill_mem:
                self.fill_memory()
            break_flag = break_flag + 1 if step == max_steps else 0
            if break_flag > 50 and e >= episodes / 2: break
        ensure_saved_models_dir()
        saver = tf.train.Saver()
        saver.save(self.session, self.train_path)

        if plot_rewards:
            t_time = time.time() - start
            print("Mean score:", np.mean(scores), " Total steps:",
                  np.sum(steps), " total time:", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_scores", scores)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_time", t_time)
            np.save(
                "./train_data/ddpg_enc_actions" + str(self.state_size) +
                str(self.n_neighbors) + "_steps", steps)
            plot(steps)
            plot_running_avg(steps)