def __init__(self, X, Y, num_induced_points=None, controller=None, reward=None, m_init=None, S_init=None, name=None, debug=False): # super(PILCO, self).__init__(name) if not num_induced_points: # num_induced_points ? self.mgpr = MGPR(X, Y) else: self.mgpr = SMGPR(X, Y, num_induced_points) self.state_dim = Y.shape[1] self.control_dim = X.shape[1] - Y.shape[1] self.sess = gpflow.get_default_session() if debug: self.sess = tf_debug.LocalCLIDebugWrapperSession(self.sess) # self.sess.run(tf.global_variables_initializer()) if controller is None: # the policy - to change print("controller cannot be None") else: self.controller = controller if reward is None: # reward function self.reward = Reward() else: self.reward = reward if m_init is None or S_init is None: # If the user has not provided an initial state for the rollouts, # then define it as the first state in the dataset. self.m_init = X[0:1, 0:self.state_dim] self.S_init = np.diag(np.ones(self.state_dim) * 0.1) # variance else: self.m_init = m_init self.S_init = S_init
def __init__(self, X, Y, horizon=30, m_init=None, S_init=None): self.mgpr = MGPR(X, Y) self.state_dim = Y.shape[1] self.control_dim = X.shape[1] - Y.shape[1] self.horizon = horizon self.controller = LinearController(self.state_dim, self.control_dim) self.reward = ExponentialReward(self.state_dim) if m_init is None or S_init is None: # default initial state for the rollouts is the first state in the dataset. self.m_init = X[0:1, 0:self.state_dim] self.S_init = np.diag(np.ones(self.state_dim) * 0.1) else: self.m_init = m_init self.S_init = S_init self.m_init = torch.tensor(self.m_init, dtype=torch.float32) self.S_init = torch.tensor(self.S_init, dtype=torch.float32) self.optimizer = torch.optim.Adam(self.controller.parameters())
def __init__(self, X, Y, num_induced_points=None, horizon=100, controller=None, reward=None, m_init=None, S_init=None, name=None): super(PILCO, self).__init__(name) self.mgpr = MGPR(X, Y) self.state_dim = Y.shape[1] self.control_dim = X.shape[1] - Y.shape[1] self.horizon = horizon self.controller = controller self.reward = reward self.m_init = X[0:1, 0:self.state_dim] self.S_init = np.diag(np.ones(self.state_dim) * 0.1) self.optimizer = None