Exemplo n.º 1
0
    def __init__(self,
                 shape,
                 mean=0.0,
                 log_stddev=0.0,
                 scope='gaussian',
                 summary_labels=()):
        """
        Categorical distribution.

        Args:
            shape: Action shape.
            mean: Optional distribution bias for the mean.
            log_stddev: Optional distribution bias for the standard deviation.
        """
        self.shape = shape
        action_size = util.prod(self.shape)

        self.mean = Linear(size=action_size,
                           bias=mean,
                           scope='mean',
                           summary_labels=summary_labels)
        self.log_stddev = Linear(size=action_size,
                                 bias=log_stddev,
                                 scope='log-stddev',
                                 summary_labels=summary_labels)

        super(Gaussian, self).__init__(shape=shape,
                                       scope=scope,
                                       summary_labels=summary_labels)
Exemplo n.º 2
0
    def __init__(self,
                 shape,
                 min_value,
                 max_value,
                 alpha=0.0,
                 beta=0.0,
                 scope='beta',
                 summary_labels=()):
        """
        Beta distribution.

        Args:
            shape: Action shape.
            min_value: Minimum value of continuous actions.
            max_value: Maximum value of continuous actions.
            alpha: Optional distribution bias for the alpha value.
            beta: Optional distribution bias for the beta value.
        """
        assert min_value is None or max_value > min_value
        self.shape = shape
        self.min_value = min_value
        self.max_value = max_value
        action_size = util.prod(self.shape)

        self.alpha = Linear(size=action_size, bias=alpha, scope='alpha')
        self.beta = Linear(size=action_size, bias=beta, scope='beta')

        super(Beta, self).__init__(shape=shape,
                                   scope=scope,
                                   summary_labels=summary_labels)
Exemplo n.º 3
0
    def __init__(self,
                 shape,
                 min_value,
                 max_value,
                 alpha=0.0,
                 beta=0.0,
                 scope='beta',
                 summary_labels=()):
        """
        Beta distribution used for continuous actions. In particular, the Beta distribution
        allows to bound action values with min and max values.

        Args:
            shape: Shape of actions
            min_value: Min value of all actions for the given shape
            max_value: Max value of all actions for the given shape
            alpha: Concentration parameter of the Beta distribution
            beta: Concentration parameter of the Beta distribution
        """
        assert min_value is None or max_value > min_value
        self.shape = shape
        self.min_value = min_value
        self.max_value = max_value
        action_size = util.prod(self.shape)

        self.alpha = Linear(size=action_size, bias=alpha, scope='alpha')
        self.beta = Linear(size=action_size, bias=beta, scope='beta')

        super(Beta, self).__init__(scope, summary_labels)
Exemplo n.º 4
0
    def initialize(self, custom_getter):
        super(QNAFModel, self).initialize(custom_getter)

        self.state_values = dict()
        self.l_entries = dict()
        for name, action in self.actions_spec.items():
            num_action = util.prod(action['shape'])
            self.state_values[name] = Linear(size=num_action, scope='state-value')
            self.l_entries[name] = Linear(size=(num_action * (num_action - 1) // 2), scope='l-entries')
Exemplo n.º 5
0
    def setup_components_and_tf_funcs(self, custom_getter=None):
        super(QNAFModel, self).setup_components_and_tf_funcs(custom_getter)

        self.state_values = dict()
        self.l_entries = dict()
        for name, action in self.actions_spec.items():
            num_action = util.prod(action['shape'])
            self.state_values[name] = Linear(size=num_action, scope='state-value')
            self.l_entries[name] = Linear(size=(num_action * (num_action - 1) // 2), scope='l-entries')
Exemplo n.º 6
0
    def __init__(self, shape, mean=0.0, log_stddev=0.0, scope='gaussian', summary_labels=()):
        self.shape = shape
        action_size = util.prod(self.shape)

        with tf.name_scope(name=scope):
            self.mean = Linear(size=action_size, bias=mean, scope='mean')
            self.log_stddev = Linear(size=action_size, bias=log_stddev, scope='log-stddev')

        super(Gaussian, self).__init__(scope, summary_labels)
Exemplo n.º 7
0
    def __init__(self,
                 shape,
                 num_actions,
                 probabilities=None,
                 scope='categorical',
                 summary_labels=()):
        """
        Categorical distribution.

        Args:
            shape: Action shape.
            num_actions: Number of discrete action alternatives.
            probabilities: Optional distribution bias.
        """
        self.num_actions = num_actions

        action_size = util.prod(shape) * self.num_actions
        if probabilities is None:
            logits = 0.0
        else:
            logits = [
                log(prob) for _ in range(util.prod(shape))
                for prob in probabilities
            ]
        self.logits = Linear(size=action_size, bias=logits, scope='logits')

        super(Categorical, self).__init__(shape=shape,
                                          scope=scope,
                                          summary_labels=summary_labels)
Exemplo n.º 8
0
    def __init__(self, shape, probability=0.5, scope='bernoulli', summary_labels=()):
        self.shape = shape
        action_size = util.prod(self.shape)

        with tf.name_scope(name=scope):
            self.logit = Linear(size=action_size, bias=log(probability), scope='logit')

        super(Bernoulli, self).__init__(scope, summary_labels)
Exemplo n.º 9
0
    def __init__(self, states_spec, actions_spec, network_spec, config):
        if any(action['type'] != 'float' or 'min_value' in action or 'max_value' in action for action in actions_spec.values()):
            raise TensorForceError("Only unconstrained float actions valid for NAFModel.")

        with tf.name_scope(name=config.scope):
            self.state_values = dict()
            self.l_entries = dict()
            for name, action in actions_spec.items():
                num_action = util.prod(action['shape'])
                self.state_values[name] = Linear(size=num_action, scope=(name + 'state-value'))
                self.l_entries[name] = Linear(size=(num_action * (num_action - 1) // 2), scope=(name + '-l-entries'))

        super(QNAFModel, self).__init__(
            states_spec=states_spec,
            actions_spec=actions_spec,
            network_spec=network_spec,
            config=config
        )
Exemplo n.º 10
0
    def __init__(self, shape, num_actions, probabilities=None, scope='categorical', summary_labels=()):
        self.shape = shape
        self.num_actions = num_actions
        if probabilities is None:
            logits = 0.0
        else:
            logits = [log(prob) for _ in range(util.prod(shape)) for prob in probabilities]
        action_size = util.prod(self.shape) * self.num_actions

        self.logits = Linear(size=action_size, bias=logits, scope='logits')

        super(Categorical, self).__init__(scope, summary_labels)
Exemplo n.º 11
0
    def __init__(self, network_spec, scope='network-baseline', summary_labels=()):
        """
        Network baseline.

        Args:
            network_spec: Network specification dict
        """
        with tf.name_scope(name=scope):
            self.network = Network.from_spec(spec=network_spec)
            assert len(self.network.internal_inputs()) == 0

            self.linear = Linear(size=1, bias=0.0, scope='prediction')

        super(NetworkBaseline, self).__init__(scope, summary_labels)
Exemplo n.º 12
0
    def __init__(self, scope='ddpg-critic-network', summary_labels=(), size_t0=400, size_t1=300):
        super(DDPGCriticNetwork, self).__init__(scope=scope, summary_labels=summary_labels)

        self.t0l = Linear(size=size_t0, scope='linear0')
        self.t0b = TFLayer(layer='batch_normalization', scope='batchnorm0', center=True, scale=True)
        self.t0n = Nonlinearity(name='relu', scope='relu0')

        self.t1l = Linear(size=size_t1, scope='linear1')
        self.t1b = TFLayer(layer='batch_normalization', scope='batchnorm1', center=True, scale=True)
        self.t1n = Nonlinearity(name='relu', scope='relu1')

        self.t2d = Dense(size=1, activation='tanh', scope='dense0',
                         weights=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3))

        self.add_layer(self.t0l)
        self.add_layer(self.t0b)
        self.add_layer(self.t0n)

        self.add_layer(self.t1l)
        self.add_layer(self.t1b)
        self.add_layer(self.t1n)

        self.add_layer(self.t2d)
Exemplo n.º 13
0
    def __init__(self, network, scope='network-baseline', summary_labels=()):
        """
        Network baseline.

        Args:
            network_spec: Network specification dict
        """
        self.network = Network.from_spec(
            spec=network, kwargs=dict(summary_labels=summary_labels))
        assert len(self.network.internals_spec()) == 0

        self.linear = Linear(size=1, bias=0.0, scope='prediction')

        super(NetworkBaseline, self).__init__(scope=scope,
                                              summary_labels=summary_labels)
Exemplo n.º 14
0
    def __init__(self, baselines, scope='aggregated-baseline', summary_labels=()):
        """
        Aggregated baseline.

        Args:
            baselines: Dict of per-state baseline specification dicts
        """

        self.baselines = dict()
        for name in sorted(baselines):
            self.baselines[name] = Baseline.from_spec(
                spec=baselines[name],
                kwargs=dict(summary_labels=summary_labels))

        self.linear = Linear(size=1, bias=0.0, scope='prediction', summary_labels=summary_labels)

        super(AggregatedBaseline, self).__init__(scope, summary_labels)
Exemplo n.º 15
0
    def __init__(self, baselines, scope='aggregated-baseline', summary_labels=()):
        """
        Aggregated baseline.

        Args:
            baselines: Dict of per-state baseline specification dicts
        """

        with tf.name_scope(name=scope):
            self.baselines = dict()
            for name, baseline_spec in baselines.items():
                with tf.name_scope(name=(name + '-baseline')):
                    self.baselines[name] = Baseline.from_spec(
                        spec=baseline_spec,
                        kwargs=dict(summary_labels=summary_labels)
                    )

            self.linear = Linear(size=1, bias=0.0, scope='prediction')

        super(AggregatedBaseline, self).__init__(scope, summary_labels)
Exemplo n.º 16
0
    def __init__(self,
                 shape,
                 probability=0.5,
                 scope='bernoulli',
                 summary_labels=()):
        """
        Bernoulli distribution.

        Args:
            shape: Action shape.
            probability: Optional distribution bias.
        """
        self.shape = shape
        action_size = util.prod(self.shape)

        self.logit = Linear(size=action_size,
                            bias=log(probability),
                            scope='logit')

        super(Bernoulli, self).__init__(shape=shape,
                                        scope=scope,
                                        summary_labels=summary_labels)