Exemplo n.º 1
0
    def act(self, *, states, horizons, internals, auxiliaries, independent,
            return_internals):
        if independent:  # TODO: or temp constant 0.0
            embedding = self.network.apply(x=states,
                                           horizons=horizons,
                                           internals=internals,
                                           independent=independent,
                                           return_internals=return_internals)
            if return_internals:
                embedding, internals = embedding

            def function(name, distribution):
                conditions = auxiliaries.get(name, default=TensorDict())
                parameters = distribution.parametrize(x=embedding,
                                                      conditions=conditions)
                return distribution.mode(parameters=parameters)

            actions = self.distributions.fmap(function=function,
                                              cls=TensorDict,
                                              with_names=True)

            if return_internals:
                return actions, internals
            else:
                return actions

        else:
            return Stochastic.act(self=self,
                                  states=states,
                                  horizons=horizons,
                                  internals=internals,
                                  auxiliaries=auxiliaries,
                                  independent=independent,
                                  return_internals=return_internals)
Exemplo n.º 2
0
 def tf_act(self, states, internals, auxiliaries, return_internals):
     return Stochastic.tf_act(self=self,
                              states=states,
                              internals=internals,
                              auxiliaries=auxiliaries,
                              return_internals=return_internals)