示例#1
0
 def __init__(self, state_dim, action_dim, hdim):
     super(SimpleQFn, self).__init__()
     self.Q_head = MLP(dims=[state_dim + action_dim, *hdim, 1])
示例#2
0
 def __init__(self, state_dim, hdim):
     super(SimpleValueFn, self).__init__()
     self.value_head = MLP(dims=[state_dim, *hdim, 1])
示例#3
0
 def __init__(self, state_dim, hdim, action_dim):
     super(SimpleBetaReluPolicy, self).__init__()
     self.encoder = MLP(dims=[state_dim, *hdim])
     self.decoder = BetaReluParams(hdim[-1], action_dim)
     self.discrete = False
示例#4
0
 def __init__(self, state_dim, hdim, action_dim):
     super(SimpleBetaSoftPlusPolicy, self).__init__()
     self.encoder = MLP(dims=[state_dim, *hdim])
     self.decoder = BetaSoftPlusParams(hdim[-1], action_dim)
示例#5
0
 def __init__(self, state_dim, action_dim):
     super(DiscreteCNNPolicy, self).__init__()
     self.encoder = CNN(*state_dim)
     self.decoder = MLP(
         dims=[self.encoder.image_embedding_size, action_dim])
示例#6
0
 def __init__(self, state_dim, hdim, action_dim):
     super(BidPolicyLN, self).__init__()
     self.bid_mu = MLP(dims=[state_dim, *hdim, action_dim], zero_init=True)
     self.bid_logstd = MLP(dims=[state_dim, *hdim, action_dim],
                           zero_init=True)
     self.discrete = False
示例#7
0
 def __init__(self, state_dim, hdim, action_dim):
     super(DiscretePolicy, self).__init__()
     self.network = MLP(dims=[state_dim, *hdim, action_dim])