示例#1
0
    def __init__(self, state_dim, action_dim):
        super(DiscreteCNNPolicy, self).__init__()
        self.state_dim = state_dim
        self.action_dim = action_dim

        if self.state_dim == (1, 64, 64):
            self.encoder = MNIST_CNN(self.action_dim)
            self.decoder = lambda x: x
        elif self.state_dim == (7, 7, 3):
            self.encoder = MinigridCNN(*state_dim[:-1])
            self.decoder = MLP(
                dims=[self.encoder.image_embedding_size, action_dim])
        elif self.state_dim == (11, 8, 8):
            self.encoder = BoxPushCNN(*state_dim[1:])
            self.decoder = MLP(
                dims=[self.encoder.image_embedding_size, action_dim])
        else:
            assert False
示例#2
0
 def __init__(self, state_dim, hdim, action_dim):
     super(SimpleBetaMeanPolicy, self).__init__()
     self.encoder = MLP(dims=[state_dim, *hdim])
     self.decoder = BetaMeanParams(hdim[-1], action_dim)
示例#3
0
 def __init__(self, state_dim, hdim, action_dim):
     super(DiscretePolicy, self).__init__()
     self.state_dim = state_dim
     self.action_dim = action_dim
     self.network = MLP(dims=[state_dim, *hdim, action_dim])
示例#4
0
 def __init__(self, state_dim, hdim, action_dim):
     super(IsotropicGaussianPolicy, self).__init__()
     self.encoder = MLP(dims=[state_dim, *hdim])
     self.decoder = GaussianParams(hdim[-1], action_dim)
示例#5
0
 def __init__(self, state_dim, hdim):
     super(SimpleValueFn, self).__init__()
     self.value_net = MLP(dims=[state_dim, *hdim, 1])