Exemplo n.º 1
0
    def __init__(self, X, Y, num_induced_points=None, controller=None,
                reward=None, m_init=None, S_init=None, name=None, debug=False):
        # super(PILCO, self).__init__(name)
        if not num_induced_points:      # num_induced_points ?
            self.mgpr = MGPR(X, Y)
        else:
            self.mgpr = SMGPR(X, Y, num_induced_points)
        self.state_dim = Y.shape[1]
        self.control_dim = X.shape[1] - Y.shape[1]

        self.sess = gpflow.get_default_session()
        if debug:
            self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess)
        # self.sess.run(tf.global_variables_initializer())

        if controller is None:   # the policy  - to change
            print("controller cannot be None")
        else:
            self.controller = controller

        if reward is None:     # reward function
            self.reward = Reward()
        else:
            self.reward = reward
        
        if m_init is None or S_init is None:
            # If the user has not provided an initial state for the rollouts,
            # then define it as the first state in the dataset.
            self.m_init = X[0:1, 0:self.state_dim]
            self.S_init = np.diag(np.ones(self.state_dim) * 0.1)  # variance
        else:
            self.m_init = m_init
            self.S_init = S_init
Exemplo n.º 2
0
    def __init__(self, X, Y, horizon=30, m_init=None, S_init=None):
        self.mgpr = MGPR(X, Y)

        self.state_dim = Y.shape[1]
        self.control_dim = X.shape[1] - Y.shape[1]
        self.horizon = horizon

        self.controller = LinearController(self.state_dim, self.control_dim)
        self.reward = ExponentialReward(self.state_dim)

        if m_init is None or S_init is None:
            # default initial state for the rollouts is the first state in the dataset.
            self.m_init = X[0:1, 0:self.state_dim]
            self.S_init = np.diag(np.ones(self.state_dim) * 0.1)
        else:
            self.m_init = m_init
            self.S_init = S_init
        self.m_init = torch.tensor(self.m_init, dtype=torch.float32)
        self.S_init = torch.tensor(self.S_init, dtype=torch.float32)
        self.optimizer = torch.optim.Adam(self.controller.parameters())
Exemplo n.º 3
0
 def __init__(self,
              X,
              Y,
              num_induced_points=None,
              horizon=100,
              controller=None,
              reward=None,
              m_init=None,
              S_init=None,
              name=None):
     super(PILCO, self).__init__(name)
     self.mgpr = MGPR(X, Y)
     self.state_dim = Y.shape[1]
     self.control_dim = X.shape[1] - Y.shape[1]
     self.horizon = horizon
     self.controller = controller
     self.reward = reward
     self.m_init = X[0:1, 0:self.state_dim]
     self.S_init = np.diag(np.ones(self.state_dim) * 0.1)
     self.optimizer = None