def act(self, *, states, horizons, internals, auxiliaries, independent, return_internals): if independent: # TODO: or temp constant 0.0 embedding = self.network.apply(x=states, horizons=horizons, internals=internals, independent=independent, return_internals=return_internals) if return_internals: embedding, internals = embedding def function(name, distribution): conditions = auxiliaries.get(name, default=TensorDict()) parameters = distribution.parametrize(x=embedding, conditions=conditions) return distribution.mode(parameters=parameters) actions = self.distributions.fmap(function=function, cls=TensorDict, with_names=True) if return_internals: return actions, internals else: return actions else: return Stochastic.act(self=self, states=states, horizons=horizons, internals=internals, auxiliaries=auxiliaries, independent=independent, return_internals=return_internals)
def tf_act(self, states, internals, auxiliaries, return_internals): return Stochastic.tf_act(self=self, states=states, internals=internals, auxiliaries=auxiliaries, return_internals=return_internals)