def __init__(self, state_dim, action_dim, hdim): super(SimpleQFn, self).__init__() self.Q_head = MLP(dims=[state_dim + action_dim, *hdim, 1])
def __init__(self, state_dim, hdim): super(SimpleValueFn, self).__init__() self.value_head = MLP(dims=[state_dim, *hdim, 1])
def __init__(self, state_dim, hdim, action_dim): super(SimpleBetaReluPolicy, self).__init__() self.encoder = MLP(dims=[state_dim, *hdim]) self.decoder = BetaReluParams(hdim[-1], action_dim) self.discrete = False
def __init__(self, state_dim, hdim, action_dim): super(SimpleBetaSoftPlusPolicy, self).__init__() self.encoder = MLP(dims=[state_dim, *hdim]) self.decoder = BetaSoftPlusParams(hdim[-1], action_dim)
def __init__(self, state_dim, action_dim): super(DiscreteCNNPolicy, self).__init__() self.encoder = CNN(*state_dim) self.decoder = MLP( dims=[self.encoder.image_embedding_size, action_dim])
def __init__(self, state_dim, hdim, action_dim): super(BidPolicyLN, self).__init__() self.bid_mu = MLP(dims=[state_dim, *hdim, action_dim], zero_init=True) self.bid_logstd = MLP(dims=[state_dim, *hdim, action_dim], zero_init=True) self.discrete = False
def __init__(self, state_dim, hdim, action_dim): super(DiscretePolicy, self).__init__() self.network = MLP(dims=[state_dim, *hdim, action_dim])