Пример #1
0
 def calculate_loss(self, ob, last_ob, acs, feats=None, last_feat=None):
     n_chunks = 8
     n = ob.shape[0]
     chunk_size = n // n_chunks
     assert n % n_chunks == 0
     sli = lambda i: slice(i * chunk_size, (i + 1) * chunk_size)
     print("calculating dyn loss")
     if self.auxiliary_task.features_shared_with_policy and self.auxiliary_task.policy.lstm1_size:
         return np.concatenate([
             getsess().run(
                 self.loss, {
                     self.auxiliary_task.ph_features:
                     feats[sli(i)],
                     self.auxiliary_task.ph_last_features:
                     np.expand_dims(last_feat[sli(i)], axis=1),
                     self.obs:
                     ob[sli(i)],
                     self.last_ob:
                     last_ob[sli(i)],
                     self.ac:
                     acs[sli(i)]
                 }) for i in range(n_chunks)
         ], 0)
     else:
         return np.concatenate([
             getsess().run(
                 self.loss, {
                     self.obs: ob[sli(i)],
                     self.last_ob: last_ob[sli(i)],
                     self.ac: acs[sli(i)]
                 }) for i in range(n_chunks)
         ], 0)
 def get_ac_value_nlp_eval(self, ob):
     feed_dict = {self.ph_ob: ((ob,),), self.c_in_1: self.lstm1_c_eval, self.h_in_1: self.lstm1_h_eval}
     if self.lstm2_size:
         feed_dict.update({self.c_in_2: self.lstm2_c_eval, self.h_in_2: self.lstm2_h_eval})
     if self.lstm2_size:  
         a, vpred, nlp, self.lstm1_c_eval, self.lstm1_h_eval, self.lstm2_c_eval, self.lstm2_h_eval = \
             getsess().run([self.a_samp, self.vpred, self.nlp_samp, self.c_out_1, self.h_out_1, \
                             self.c_out_2, self.h_out_2],
                             feed_dict=feed_dict)
     else:
         a, vpred, nlp, self.lstm1_c_eval, self.lstm1_h_eval = \
             getsess().run([self.a_samp, self.vpred, self.nlp_samp, self.c_out_1, self.h_out_1], feed_dict=feed_dict)
     return a[:,0], vpred[:,0], nlp[:,0]
 def get_ac_value_nlp(self, ob):
     # ob: [None, h, w, c]
     # ob[:, None]: [None, None, h, w, c]
     a, vpred, nlp = \
         getsess().run([self.a_samp, self.vpred, self.nlp_samp],
                       feed_dict={self.ph_ob: ob[:, None]})
     return a[:, 0], vpred[:, 0], nlp[:, 0]
Пример #4
0
 def get_ac_value_nlp(self, ob):
     # ob.shape=(128,84,84,1),  在作为 feed_dict 之前增加一个维度 ob[:,None].shape=(128,1,84,84,4)
     a, vpred, nlp = \
         getsess().run([self.a_samp, self.vpred, self.nlp_samp],
                       feed_dict={self.ph_ob: ob[:, None]})
     # 输出 a.shape = vpred.shape = nlp.shape = (128,1)
     return a[:, 0], vpred[:, 0], nlp[:, 0]
 def get_ac_value_nlp(self, ob):
     feed_dict = {self.ph_ob: ob[:, None], self.c_in_1: self.lstm1_c, self.h_in_1: self.lstm1_h}
     if self.lstm2_size > 0:
         feed_dict.update({self.c_in_2: self.lstm2_c, self.h_in_2: self.lstm2_h})
     if self.lstm2_size > 0:  
         a, vpred, nlp, self.lstm1_c, self.lstm1_h, self.lstm2_c, self.lstm2_h = \
             getsess().run([self.a_samp, self.vpred, self.nlp_samp, self.c_out_1, self.h_out_1, \
                             self.c_out_2, self.h_out_2],
                             feed_dict=feed_dict)
     else:
         a, vpred, nlp, self.lstm1_c, self.lstm1_h = \
             getsess().run([self.a_samp, self.vpred, self.nlp_samp, self.c_out_1, self.h_out_1], feed_dict=feed_dict)
     #print("LSTM1 c: {}".format(self.lstm1_c))
     #print("LSTM1 h: {}".format(self.lstm1_h))
     #print("LSTM2 c: {}".format(self.lstm2_c))
     #print("LSTM2 h: {}".format(self.lstm2_h))
     return a[:, 0], vpred[:, 0], nlp[:, 0]
Пример #6
0
 def get_ac_value_nlp(self, ob):
     a, vpred, nlp, logstd, std, mean = \
         getsess().run([self.a_samp, self.vpred, self.nlp_samp, self.pd_logstd, self.pd_std, self.pd_mean],
                       feed_dict={self.ph_ob: ob[:, None]})
     #print('---LOGSTD--',logstd[:,0])
     #print('---STD--',std[:,0])
     #print('---MEAN--',mean[:,0])
     return a[:, 0], vpred[:, 0], nlp[:, 0]
Пример #7
0
 def save_model(self, model_name, path_dir=None):
     self.saver = tf.train.Saver()
     if path_dir is None:
         path = "/tmp/" + model_name + ".ckpt"
     else:
         path = path_dir + model_name + ".ckpt"
     self.saver.save(getsess(), path)
     print("Model saved to path", path)
 def restore_model(self, model_name):
     saver = tf.train.import_meta_graph("models/" + model_name + ".ckpt" + ".meta")
     saver.restore(getsess(), "models/" + model_name + ".ckpt")
     self.vpred = tf.get_collection("vpred")[0]
     self.a_samp = tf.get_collection("a_samp")[0]
     self.entropy = tf.get_collection("entropy")[0]
     self.nlp_samp = tf.get_collection("nlp_samp")[0]
     self.ph_ob = tf.get_collection("ph_ob")[0]
Пример #9
0
    def restore_model(self, model_name, path_dir=None):
        if path_dir is None:
            path = "/tmp/" + model_name + ".ckpt"
        else:
            path = path_dir + model_name  #+ ".ckpt"

    # self.saver = tf.train.import_meta_graph(path + ".meta")
        self.saver = tf.train.Saver()
        self.saver.restore(getsess(), path)
Пример #10
0
 def calculate_loss(self, ob, last_ob, acs):
     n_chunks = 8
     n = ob.shape[0]
     chunk_size = n // n_chunks
     assert n % n_chunks == 0
     sli = lambda i: slice(i * chunk_size, (i + 1) * chunk_size)
     return np.concatenate([getsess().run(self.loss,
                                          {self.obs: ob[sli(i)], self.last_ob: last_ob[sli(i)],
                                           self.ac: acs[sli(i)]}) for i in range(n_chunks)], 0)
 def save_model(self, model_name, ep_num):
     self.saver = tf.train.Saver()
     if not os.path.exists("models"):
         os.makedirs("models")
     if ep_num:
         path = "models/" + model_name + "_ep{}".format(ep_num) + ".ckpt"
     else:
         path = "models/" + model_name + "_{}".format("final") + ".ckpt"
     self.saver.save(getsess(), path)
     print("Model saved to path", path)
Пример #12
0
 def calculate_loss(self, ob, last_ob, acs, feat_input, pat):
     n_chunks = 8
     ans_buf = []
     ans = []
     ac_buf = []
     feat_buf = []
     n = ob.shape[0]
     chunk_size = n // n_chunks
     assert n % n_chunks == 0
     sli = lambda i: slice(i * chunk_size, (i + 1) * chunk_size)
     # important last_ob: pi(s_t+1) obs:pi(s_t) ac: (s0,a0)
     for i in range(n_chunks):
         if pat:
             (ans, feat) = getsess().run(
                 self.loss, {
                     self.obs: ob[sli(i)],
                     self.last_ob: last_ob[sli(i)],
                     self.ac: acs[sli(i)],
                     self.features: feat_input[sli(i)]
                 })
         else:
             (ans, feat) = getsess().run(
                 self.loss, {
                     self.obs: ob[sli(i)],
                     self.last_ob: last_ob[sli(i)],
                     self.ac: acs[sli(i)]
                 })
         ans_buf.append(ans)
         ac_buf.append(acs[sli(i)])
         feat_buf.append(feat)
     """
     if pat:
         return tf.stop_gradient(np.concatenate(ans_buf,0)),tf.stop_gradient(np.concatenate(ac_buf,0)), tf.stop_gradient(np.concatenate(feat_buf,0))
     else:
     """
     return np.concatenate(ans_buf, 0), np.concatenate(ac_buf,
                                                       0), np.concatenate(
                                                           feat_buf, 0)
    def get_ac_value_nlp_extra_input(self, ob, vel, prev_ac, prev_rew, feats=False):
        feed_ac = np.expand_dims(np.array(prev_ac), axis=1)
        #feed_ac = prev_ac
        feed_vel = np.expand_dims(np.array(vel), axis=1)
        feed_rew = np.expand_dims(np.array(prev_rew), axis=1)
        #feed_rew = prev_rew
        #print("Ac, vel, rew shapes:", feed_ac.shape, feed_vel.shape, feed_rew.shape)
        feed_dict = {self.ph_ob: ob[:, None], self.ph_vel: feed_vel, self.ph_prev_ac: feed_ac, self.ph_prev_rew: feed_rew, self.c_in_1: self.lstm1_c, self.h_in_1: self.lstm1_h}
        if self.lstm2_size > 0:
            feed_dict.update({self.c_in_2: self.lstm2_c, self.h_in_2: self.lstm2_h})
        
        if self.lstm2_size > 0:
            if feats:  
                out_feats, a, vpred, nlp, self.lstm1_c, self.lstm1_h, self.lstm2_c, self.lstm2_h = \
                    getsess().run([self.features, self.a_samp, self.vpred, self.nlp_samp, self.c_out_1, self.h_out_1, \
                                self.c_out_2, self.h_out_2],
                                feed_dict=feed_dict)
                self.last_c_1 = self.lstm1_c
                self.last_h_1 = self.lstm1_h
                self.last_c_2 = self.lstm2_c
                self.last_h_2 = self.lstm2_h

            else:
                out_feats, a, vpred, nlp, self.lstm1_c, self.lstm1_h, self.lstm2_c, self.lstm2_h = \
                    getsess().run([self.features, self.a_samp, self.vpred, self.nlp_samp, self.c_out_1, self.h_out_1, \
                                self.c_out_2, self.h_out_2],
                                feed_dict=feed_dict)

        else:
            a, vpred, nlp, self.lstm1_c, self.lstm1_h = \
                getsess().run([self.features, self.a_samp, self.vpred, self.nlp_samp, self.c_out_1, self.h_out_1], feed_dict=feed_dict)
        #print("LSTM1 c: {}".format(self.lstm1_c))
        #print("LSTM1 h: {}".format(self.lstm1_h))
        #print("LSTM2 c: {}".format(self.lstm2_c))
        #print("LSTM2 h: {}".format(self.lstm2_h))
        if feats:
            return a[:, 0], vpred[:, 0], nlp[:, 0], out_feats[:,0,:]
        return a[:, 0], vpred[:, 0], nlp[:, 0]
 def calculate_loss(self, ob, last_ob, acs):
     n_chunks = 8
     n = ob.shape[0]
     chunk_size = n // n_chunks
     assert n % n_chunks == 0
     sli = lambda i: slice(i * chunk_size, (i + 1) * chunk_size)
     result = [
         getsess().run(
             [self.loss1, self.first_pred, self.first_pred_flat], {
                 self.obs: ob[sli(i)],
                 self.last_ob: last_ob[sli(i)],
                 self.ac: acs[sli(i)]
             }) for i in range(n_chunks)
     ]
     self.buff_preds = [result[i][2] for i in range(n_chunks)]
     loss_total = [result[i][0] for i in range(n_chunks)]
     discount = self.pred_discount
     for p in range(self.num_preds - 1):
         result = [
             getsess().run(
                 [self.loss2, self.next_pred, self.next_pred_flat], {
                     self.obs: ob[sli(i)],
                     self.last_ob: last_ob[sli(i)],
                     self.features: result[i - 1 - p][1],
                     self.extracted_features: result[i - 1 - p][2]
                 }) for i in range(1, n_chunks)
         ]
         loss2 = [result[i][0] for i in range(n_chunks - 1 - p)]
         avg_loss2 = np.sum(loss2, axis=0) / len(loss2)
         for q in range(p + 1):
             loss2.append(avg_loss2)
         loss_total = [
             loss_total[i] + (discount * loss2[i]) for i in range(n_chunks)
         ]
         discount = discount * self.pred_discount
     return np.concatenate(loss_total, 0)
Пример #15
0
    def train(self):
        self.agent.start_interaction(self.envs,
                                     nlump=self.hps['nlumps'],
                                     intrinsic_model=self.intrinsic_model)

        sess = getsess()

        while True:
            info = self.agent.step()
            if info['update']:
                logger.logkvs(info['update'])
                logger.dumpkvs()
            if self.agent.rollout.stats['tcount'] > self.num_timesteps:
                break

        self.agent.stop_interaction()
Пример #16
0
 def calculate_loss(self, ob, last_ob, acs):
     n_chunks = 8
     ans_buf = []
     ans = []
     ac_buf = []
     ps_buf = []
     n = ob.shape[0]
     chunk_size = n // n_chunks
     assert n % n_chunks == 0
     sli = lambda i: slice(i * chunk_size, (i + 1) * chunk_size)
     # important last_ob: pi(s_t+1) obs:pi(s_t) ac: (s0,a0)
     for i in range(n_chunks):
         (ans, ps) = getsess().run(self.loss,{self.obs: ob[sli(i)], self.last_ob: last_ob[sli(i)],
                                           self.ac: acs[sli(i)]})
         ans_buf.append(ans)
         ps_buf.append(ps)
         ac_buf.append(acs[sli(i)])
     return np.concatenate(ans_buf,0), np.concatenate(ps_buf,0), np.concatenate(ac_buf,0)
     
     """
Пример #17
0
    def calculate_reward(self, ob, last_ob, acs):
        """
            这个将在 rollout l-64 和 l-76 中调用, 根据实际交互过程中遇到的状态和动作来计算内在激励.
            init 中的self.loss定义了计算图, 这里讲真实的 ob, last_ob, acs 作为feed_dict, 返回值
            obs 和 act 预测 last_obs, 计算损失. 这里分为多个 trunk 计算, 猜想是显存有限, 无法一次将批量放入

            输入: ob.shape=(128,128,84,84,4), last_ob.shape=(128,1,84,84,4), acs.shape=(128,128)
            输出: shape=(128,128,512)
        """
        n_chunks = 8
        n = ob.shape[0]
        chunk_size = n // n_chunks
        assert n % n_chunks == 0
        sli = lambda i: slice(i * chunk_size, (i + 1) * chunk_size)
        return np.concatenate([
            getsess().run(
                self.reward, {
                    self.obs: ob[sli(i)],
                    self.last_ob: last_ob[sli(i)],
                    self.ac: acs[sli(i)]
                }) for i in range(n_chunks)
        ], 0)
Пример #18
0
    def log_compute_rewards(self, ob, last_ob, acs, session=None):
        n_chunks = 8
        n = ob.shape[0]
        chunk_size = n // n_chunks
        assert n % n_chunks == 0
        sli = lambda i: slice(i * chunk_size, (i + 1) * chunk_size)

        if session is None:
            session = getsess()

        # 输出内在激励
        rew_kl_np = np.concatenate([
            session.run(self.reward_kl,
                        feed_dict={
                            self.obs: ob[sli(i)],
                            self.last_ob: last_ob[sli(i)],
                            self.ac: acs[sli(i)]
                        }) for i in range(n_chunks)
        ], 0)

        rew_elbo_np = np.concatenate([
            session.run(self.reward_elbo,
                        feed_dict={
                            self.obs: ob[sli(i)],
                            self.last_ob: last_ob[sli(i)],
                            self.ac: acs[sli(i)]
                        }) for i in range(n_chunks)
        ], 0)

        rew_elbo_var_np = np.concatenate([
            session.run(self.reward_elbo_var,
                        feed_dict={
                            self.obs: ob[sli(i)],
                            self.last_ob: last_ob[sli(i)],
                            self.ac: acs[sli(i)]
                        }) for i in range(n_chunks)
        ], 0)

        rew_pred_var_np = np.concatenate([
            session.run(self.reward_pred_var,
                        feed_dict={
                            self.obs: ob[sli(i)],
                            self.last_ob: last_ob[sli(i)],
                            self.ac: acs[sli(i)]
                        }) for i in range(n_chunks)
        ], 0)

        rew_var_mean_np = np.concatenate([
            session.run(self.reward_var_mean,
                        feed_dict={
                            self.obs: ob[sli(i)],
                            self.last_ob: last_ob[sli(i)],
                            self.ac: acs[sli(i)]
                        }) for i in range(n_chunks)
        ], 0)

        # print("Reward shape:", rew_kl_np.shape, rew_elbo_np.shape, rew_elbo_var_np.shape, rew_pred_var_np.shape)
        print("Reward mean: rew_kl: ", np.mean(rew_kl_np), ", rew_elbo:",
              np.mean(rew_elbo_np), ", rew_elbo_var:",
              np.mean(rew_elbo_var_np), ", rew_pred_var:",
              np.mean(rew_pred_var_np), ", rew_var_mean:",
              np.mean(rew_var_mean_np))

        return np.array([
            np.mean(rew_kl_np),
            np.mean(rew_elbo_np),
            np.mean(rew_elbo_var_np),
            np.mean(rew_pred_var_np),
            np.mean(rew_var_mean_np)
        ])
Пример #19
0
    def log_train_loss(self, ob, last_ob, acs, session=None):
        """
            输入: ob.shape=(128,128,84,84,4), last_ob.shape=(128,1,84,84,4), acs.shape=(128,128)
            输出: shape=(128,128,512)
        """
        n_chunks = 8
        n = ob.shape[0]
        chunk_size = n // n_chunks
        assert n % n_chunks == 0
        sli = lambda i: slice(i * chunk_size, (i + 1) * chunk_size)

        if session is None:
            session = getsess()

        # 输出损失.   shape=(128, 128)
        loss_np = np.concatenate([
            session.run(self.loss,
                        feed_dict={
                            self.obs: ob[sli(i)],
                            self.last_ob: last_ob[sli(i)],
                            self.ac: acs[sli(i)]
                        }) for i in range(n_chunks)
        ], 0)
        # print("Train Loss: shape =", loss_np.shape, ", mean=", np.mean(loss_np))

        # 输出损失中的各项
        rec_loss_np = np.concatenate([
            session.run(self.rec_loss,
                        feed_dict={
                            self.obs: ob[sli(i)],
                            self.last_ob: last_ob[sli(i)],
                            self.ac: acs[sli(i)]
                        }) for i in range(n_chunks)
        ], 0)

        kl_loss_np = np.concatenate([
            session.run(self.kl_loss,
                        feed_dict={
                            self.obs: ob[sli(i)],
                            self.last_ob: last_ob[sli(i)],
                            self.ac: acs[sli(i)]
                        }) for i in range(n_chunks)
        ], 0)

        prior_reg_loss_np = np.concatenate([
            session.run(self.prior_reg_loss,
                        feed_dict={
                            self.obs: ob[sli(i)],
                            self.last_ob: last_ob[sli(i)],
                            self.ac: acs[sli(i)]
                        }) for i in range(n_chunks)
        ], 0)
        # print("Train loss shape: rec_loss: ", rec_loss_np.shape, ", kl_loss shape: ", kl_loss_np.shape, ", prior_reg_loss shape: ", prior_reg_loss_np.shape)
        print("DVAE loss:", np.mean(loss_np), ", rec: ", np.mean(rec_loss_np),
              ", kl: ", np.mean(kl_loss_np), ", prior_reg: ",
              np.mean(prior_reg_loss_np))
        return np.array([
            np.mean(loss_np),
            np.mean(rec_loss_np),
            np.mean(kl_loss_np),
            np.mean(prior_reg_loss_np)
        ])
Пример #20
0
 def calculate_err(self, ob, last_ob, acs):
     return getsess().run([self.pred_error, self.pred_features], {
         self.obs: ob,
         self.last_ob: last_ob,
         self.ac: acs
     })
Пример #21
0
 def get_ac_value_nlp(self, ob, err, obpred, state=None, mask=None):
     a, vpred, snew, nlp = \
         getsess().run([self.a_samp, self.vpred, self.snew, self.nlp_samp],
                       feed_dict={self.ph_ob: ob[:, None], self.states_ph: state, self.masks_ph: mask[:, None],
                                  self.pred_error: err[:, None], self.obs_pred: obpred[:, None]})
     return a[:, 0], vpred[:, 0], snew, nlp[:, 0]
Пример #22
0
 def inference_get_ac_value_nlp(self, ob):
     action_scores, a, vpred, nlp = \
         getsess().run([self.joe_db1, self.a_samp, self.vpred, self.nlp_samp],
                     #   feed_dict={self.ph_ob: ob[:, None]})
                       feed_dict={self.ph_ob: ob})
     return action_scores, a[:, 0], vpred[:, 0], nlp[:, 0]
Пример #23
0
 def get_ac_value_nlp(self, ob):
     # print("obs",ob.shape,self.ph_ob.shape)
     a, vpred, nlp = \
         getsess().run([self.a_samp, self.vpred, self.nlp_samp],
                       feed_dict={self.ph_ob: ob[:, None]})
     return a[:, 0], vpred[:, 0], nlp[:, 0]
Пример #24
0
 def get_ac_value_nlp_2vf(self, ob):
     a, vpred_int, vpred_ext, nlp = \
         getsess().run([self.a_samp, self.vpred_int, self.vpred_ext, self.nlp_samp],
                       feed_dict={self.ph_ob: ob[:, None]})
     return a[:, 0], vpred_int[:, 0], vpred_ext[:, 0], nlp[:, 0]
Пример #25
0
    def calculate_loss(self, ob, last_ob, acs, audio):
        if self.updates % 200 == 1:
            if self.updates == 1:
                os.system('mkdir -p ' + self.log_dir + '/checkpoints/')
            self.saver.save(getsess(),
                            self.log_dir + '/checkpoints/model',
                            global_step=self.updates)

        self.updates += 1

        n_chunks = 8
        n = ob.shape[0]
        chunk_size = n // n_chunks
        if chunk_size == 0:
            n_chunks = 1
            chunk_size = n

        sli = lambda i: slice(i * chunk_size, (i + 1) * chunk_size)

        audio_features = self.get_audio_features(audio)
        if self.make_video:
            print("saving audio features")
            np.save(self.log_dir + '/audio_features', audio_features)

        if self.feature_space == 'joint' or self.feature_space == 'visual':
            losses = [
                getsess().run(
                    self.loss, {
                        self.audio_out_features: audio_features[sli(i)],
                        self.obs: ob[sli(i)],
                        self.last_ob: last_ob[sli(i)],
                        self.ac: acs[sli(i)]
                    }) for i in range(n_chunks)
            ]
            return np.concatenate(losses, 0), None, None, None

        variables_to_run = [self.loss, self.tf_predictions]
        if self.train_discriminator:
            variables_to_run.append(self.discriminator_predictions)
        tf_outputs = [
            getsess().run(
                variables_to_run, {
                    self.audio_out_features: audio_features[sli(i)],
                    self.obs: ob[sli(i)],
                    self.last_ob: last_ob[sli(i)],
                    self.ac: acs[sli(i)]
                }) for i in range(n_chunks)
        ]
        losses = np.concatenate([chunk[0] for chunk in tf_outputs])
        predicted_audio_features = np.concatenate(
            [chunk[1] for chunk in tf_outputs])
        if self.train_discriminator:
            discriminator_outputs = np.concatenate(
                [chunk[2] for chunk in tf_outputs])
        else:
            discriminator_outputs = None

        prediction_audio = []
        target_audio = []
        for step in range(audio_features.shape[1]):
            # Only reconstruct for environment 0
            prediction_audio.extend(
                self.reconstruct_audio(predicted_audio_features[0, step]))
            target_audio.extend(self.reconstruct_audio(audio_features[0,
                                                                      step]))
        prediction_audio = np.asarray(prediction_audio).astype(np.int16)
        target_audio = np.asarray(target_audio).astype(np.int16)

        # First term is the agent's intrinsic reward; others are used for debug video
        return losses, prediction_audio, target_audio, discriminator_outputs
 def get_ac_value_nlp_eval(self, ob):
     a, vpred, nlp = getsess().run([self.a_samp, self.vpred, self.nlp_samp],
                                   feed_dict={self.ph_ob: ((ob, ), )})
     return a[:, 0], vpred[:, 0], nlp[:, 0]
Пример #27
0
import tensorflow as tf
from utils import getsess
with tf.Session() as sess:
    new_saver = tf.train.import_meta_graph('./tmp/model.ckpt-0.meta')
    new_saver.restore(sess, tf.train.latest_checkpoint('./tmp/'))
    print(getsess())