Exemplo n.º 1
0
    def saveModel(self,
                  s_batch,
                  a_batch,
                  r_batch,
                  entropy_record,
                  end_of_video=False):
        actor_gradient, critic_gradient, td_batch = \
            a3c.compute_gradients(s_batch=np.stack(s_batch, axis=0),  # ignore the first chuck
                                  a_batch=np.vstack(a_batch),  # since we don't have the
                                  r_batch=np.vstack(r_batch),  # control over it
                                  terminal=end_of_video, actor=self.actor, critic=self.critic)
        td_loss = np.mean(td_batch)

        self.actor_gradient_batch.append(actor_gradient)
        self.critic_gradient_batch.append(critic_gradient)

        myprint("====")
        myprint("Master: Quality: Epoch", self.epoch)
        myprint("TD_loss", td_loss, "Avg_reward", np.mean(r_batch),
                "Avg_entropy", np.mean(entropy_record))
        myprint("====")

        summary_str = self.sess.run(self.summary_ops,
                                    feed_dict={
                                        self.summary_vars[0]:
                                        td_loss,
                                        self.summary_vars[1]:
                                        np.mean(r_batch),
                                        self.summary_vars[2]:
                                        np.mean(entropy_record)
                                    })

        self.writer.add_summary(summary_str, self.epoch)
        self.writer.flush()

        self.entropy_record = []

        if len(self.actor_gradient_batch) >= GRADIENT_BATCH_SIZE:

            assert len(self.actor_gradient_batch) == len(
                self.critic_gradient_batch)

            for i in range(len(self.actor_gradient_batch)):
                self.actor.apply_gradients(self.actor_gradient_batch[i])
                self.critic.apply_gradients(self.critic_gradient_batch[i])

            self.actor_gradient_batch = []
            self.critic_gradient_batch = []

            self.epoch += 1
            if self.epoch % MODEL_SAVE_INTERVAL == 0:
                # Save the neural net parameters to disk.
                save_path = self.saver.save(
                    self.sess, self.summary_dir + "/nn_model_ep_" +
                    str(self.epoch) + ".ckpt")
                myprint("Model saved in file: %s" % save_path)

        return self.getParams()
Exemplo n.º 2
0
    def saveModel(self, end_of_video=False):
        if self._vReadOnly:
            return
        if self.ipcQueue:
            self.ipcQueue[0].put({
                "id":
                self.ipcId,
                "cmd":
                IPC_CMD_UPDATE,
                "pid":
                self.pid,
                "data": [
                    self.s_batch, self.a_batch, self.r_batch,
                    self.entropy_record, end_of_video
                ]
            })
            res = None
            while True:
                res = self.ipcQueue[1].get()
                pid = res["pid"]
                res = res["res"]
                if pid == self.pid:
                    break
            actor_net_params, critic_net_params = res
            self.actor.set_network_params(actor_net_params)
            self.critic.set_network_params(critic_net_params)

            del self.s_batch[:]
            del self.a_batch[:]
            del self.r_batch[:]
            del self.entropy_record[:]

            return

        actor_gradient, critic_gradient, td_batch = \
            a3c.compute_gradients(s_batch=np.stack(self.s_batch, axis=0),  # ignore the first chuck
                                  a_batch=np.vstack(self.a_batch),  # since we don't have the
                                  r_batch=np.vstack(self.r_batch),  # control over it
                                  terminal=end_of_video, actor=self.actor, critic=self.critic)
        td_loss = np.mean(td_batch)

        self.actor_gradient_batch.append(actor_gradient)
        self.critic_gradient_batch.append(critic_gradient)

        myprint("====")
        myprint("Quality: Epoch", self.epoch)
        myprint("TD_loss", td_loss, "Avg_reward", np.mean(self.r_batch),
                "Avg_entropy", np.mean(self.entropy_record))
        myprint("====")

        summary_str = self.sess.run(self.summary_ops,
                                    feed_dict={
                                        self.summary_vars[0]:
                                        td_loss,
                                        self.summary_vars[1]:
                                        np.mean(self.r_batch),
                                        self.summary_vars[2]:
                                        np.mean(self.entropy_record)
                                    })

        self.writer.add_summary(summary_str, self.epoch)
        self.writer.flush()

        self.entropy_record = []

        if len(self.actor_gradient_batch) >= GRADIENT_BATCH_SIZE:

            assert len(self.actor_gradient_batch) == len(
                self.critic_gradient_batch)

            for i in range(len(self.actor_gradient_batch)):
                self.actor.apply_gradients(self.actor_gradient_batch[i])
                self.critic.apply_gradients(self.critic_gradient_batch[i])

            self.actor_gradient_batch = []
            self.critic_gradient_batch = []

            self.epoch += 1
            if self.epoch % MODEL_SAVE_INTERVAL == 0:
                # Save the neural net parameters to disk.
                save_path = self.saver.save(
                    self.sess, self.summary_dir + "/nn_model_ep_" +
                    str(self.epoch) + ".ckpt")
                myprint("Model saved in file: %s" % save_path)

        del self.s_batch[:]
        del self.a_batch[:]
        del self.r_batch[:]