示例#1
0
    def restore_old(self, policy, val_func, scaler, restore_path):
        #mypath = self.checkpoints_dir+"/"+restore_path
        mypath = restore_path

        print("restoring checkpoint from:", mypath)

        from policy import Policy
        from value_function import NNValueFunction

        policy = Policy(policy.obs_dim,
                        policy.act_dim,
                        policy.kl_targ,
                        restore_flag=True)
        with policy.g.as_default():
            print("0000000A")
            Checkpoint.dump_vars(policy.g)
            tf.saved_model.loader.load(policy.sess,
                                       [tf.saved_model.tag_constants.TRAINING],
                                       mypath + ".policy")
            print("1111111A")
            Checkpoint.dump_vars(policy.g)
        policy._placeholders()
        print("YYYY:", policy.obs_ph)

        val_func = NNValueFunction(val_func.obs_dim, restore_flag=True)
        with val_func.g.as_default():
            print("2222222A")
            Checkpoint.dump_vars(val_func.g)
            tf.saved_model.loader.load(val_func.sess,
                                       [tf.saved_model.tag_constants.TRAINING],
                                       mypath + ".val_func")
            print("3333333A")
            Checkpoint.dump_vars(val_func.g)
        val_func._placeholders()
        print("YYYY:", val_func.obs_ph)

        # unpickle and restore scaler
        with open(mypath + ".scaler", 'rb') as f:
            (scaler, episode) = pickle.load(f)

        print("FINISHED RESTORE")
        return (policy, val_func, scaler, episode)