def __init__(self, state_dim, action_dim): super(DiscreteCNNPolicy, self).__init__() self.state_dim = state_dim self.action_dim = action_dim if self.state_dim == (1, 64, 64): self.encoder = MNIST_CNN(self.action_dim) self.decoder = lambda x: x elif self.state_dim == (7, 7, 3): self.encoder = MinigridCNN(*state_dim[:-1]) self.decoder = MLP( dims=[self.encoder.image_embedding_size, action_dim]) elif self.state_dim == (11, 8, 8): self.encoder = BoxPushCNN(*state_dim[1:]) self.decoder = MLP( dims=[self.encoder.image_embedding_size, action_dim]) else: assert False
def __init__(self, state_dim, hdim, action_dim): super(SimpleBetaMeanPolicy, self).__init__() self.encoder = MLP(dims=[state_dim, *hdim]) self.decoder = BetaMeanParams(hdim[-1], action_dim)
def __init__(self, state_dim, hdim, action_dim): super(DiscretePolicy, self).__init__() self.state_dim = state_dim self.action_dim = action_dim self.network = MLP(dims=[state_dim, *hdim, action_dim])
def __init__(self, state_dim, hdim, action_dim): super(IsotropicGaussianPolicy, self).__init__() self.encoder = MLP(dims=[state_dim, *hdim]) self.decoder = GaussianParams(hdim[-1], action_dim)
def __init__(self, state_dim, hdim): super(SimpleValueFn, self).__init__() self.value_net = MLP(dims=[state_dim, *hdim, 1])