Exemplo n.º 1
0
    def __init__(self,
                 l2_regularization=0.0,
                 l1_regularization=0.0,
                 scope='eieeoutput',
                 summary_labels=()):
        """
        2D convolutional layer.

        Args:
            size: Number of filters set to 1
            window: Convolution window size, either an integer or pair of integers. calculated
            stride: Convolution stride, either an integer or pair of integers.
            padding: Convolution padding, one of 'VALID' or 'SAME'
            bias: If true, a bias is added
            activation: Type of nonlinearity, or dict with name & arguments
            l2_regularization: L2 regularization weight
            l1_regularization: L1 regularization weight
        """
        self.size = 1
        self.stride = 1
        self.padding = 'VALID'
        self.bias = True
        activation = 'relu'
        self.l2_regularization = l2_regularization
        self.l1_regularization = l1_regularization
        self.nonlinearity = Nonlinearity(name=activation,
                                         summary_labels=summary_labels)
        super(EIIE_OutPut, self).__init__(scope=scope,
                                          summary_labels=summary_labels)
Exemplo n.º 2
0
 def __init__(self,
              size=20,
              bias=True,
              activation='relu',
              l2_regularization=0.0,
              l1_regularization=0.0,
              scope='EIIE',
              summary_labels=()):
     self.size = size
     # Expectation is broadcast back over advantage values so output is of size 1
     self.conv1 = Conv2d(size=3,
                         bias=bias,
                         stride=(1, 1),
                         window=(1, 3),
                         padding='VALID',
                         l2_regularization=l2_regularization,
                         l1_regularization=l1_regularization,
                         summary_labels=summary_labels)
     # self.conv1= tf.nn.conv2d()
     self.conv2 = Conv2d(size=size,
                         bias=bias,
                         stride=(1, window_length - 2 - 1),
                         window=(1, window_length - 2 - 1),
                         padding='VALID',
                         l2_regularization=l2_regularization,
                         l1_regularization=l1_regularization,
                         summary_labels=summary_labels)
     self.conv3 = Conv2d(size=1,
                         bias=bias,
                         stride=(1, 1),
                         window=(1, 1),
                         l2_regularization=l2_regularization,
                         l1_regularization=l1_regularization,
                         summary_labels=summary_labels)
     self.nonlinearity = Nonlinearity(name=activation,
                                      summary_labels=summary_labels)
     self.nonlinearity2 = Nonlinearity(name=activation,
                                       summary_labels=summary_labels)
     super(EIIE, self).__init__(scope=scope, summary_labels=summary_labels)
Exemplo n.º 3
0
    def __init__(self, scope='ddpg-critic-network', summary_labels=(), size_t0=400, size_t1=300):
        super(DDPGCriticNetwork, self).__init__(scope=scope, summary_labels=summary_labels)

        self.t0l = Linear(size=size_t0, scope='linear0')
        self.t0b = TFLayer(layer='batch_normalization', scope='batchnorm0', center=True, scale=True)
        self.t0n = Nonlinearity(name='relu', scope='relu0')

        self.t1l = Linear(size=size_t1, scope='linear1')
        self.t1b = TFLayer(layer='batch_normalization', scope='batchnorm1', center=True, scale=True)
        self.t1n = Nonlinearity(name='relu', scope='relu1')

        self.t2d = Dense(size=1, activation='tanh', scope='dense0',
                         weights=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3))

        self.add_layer(self.t0l)
        self.add_layer(self.t0b)
        self.add_layer(self.t0n)

        self.add_layer(self.t1l)
        self.add_layer(self.t1b)
        self.add_layer(self.t1n)

        self.add_layer(self.t2d)
Exemplo n.º 4
0
class EIIE(Layer):
    """
    EIIE layer
    """
    def __init__(self,
                 size=20,
                 bias=True,
                 activation='relu',
                 l2_regularization=0.0,
                 l1_regularization=0.0,
                 scope='EIIE',
                 summary_labels=()):
        self.size = size
        # Expectation is broadcast back over advantage values so output is of size 1
        self.conv1 = Conv2d(size=3,
                            bias=bias,
                            stride=(1, 1),
                            window=(1, 3),
                            padding='VALID',
                            l2_regularization=l2_regularization,
                            l1_regularization=l1_regularization,
                            summary_labels=summary_labels)
        # self.conv1= tf.nn.conv2d()
        self.conv2 = Conv2d(size=size,
                            bias=bias,
                            stride=(1, window_length - 2 - 1),
                            window=(1, window_length - 2 - 1),
                            padding='VALID',
                            l2_regularization=l2_regularization,
                            l1_regularization=l1_regularization,
                            summary_labels=summary_labels)
        self.conv3 = Conv2d(size=1,
                            bias=bias,
                            stride=(1, 1),
                            window=(1, 1),
                            l2_regularization=l2_regularization,
                            l1_regularization=l1_regularization,
                            summary_labels=summary_labels)
        self.nonlinearity = Nonlinearity(name=activation,
                                         summary_labels=summary_labels)
        self.nonlinearity2 = Nonlinearity(name=activation,
                                          summary_labels=summary_labels)
        super(EIIE, self).__init__(scope=scope, summary_labels=summary_labels)

    def tf_apply(self, x0, update):
        # where window_size=50, actions=4 (giving the 3), data cols=5
        # x0 = (None,3,50,5)
        # x = (None,3,49,5)
        # x = (None,3,1,1)
        # conv1 => (None,3, 47,3)
        # conv2 => (None,3, 1, 20)
        # concat=> (None,3, 1, 21)
        # conv3 => (None,3, 1, 1)
        # concat=> (None,2, 1, 1)

        w0 = x0[:, :, :1, :1]
        x = x0[:, :, 1:, :]

        x = self.conv1.apply(x, update=update)
        # x = self.nonlinearity.apply(x=x, update=update)

        x = self.conv2.apply(x, update=update)
        # x = self.nonlinearity2.apply(x=x, update=update)

        x = tf.concat([x, w0], 3)
        x = self.conv3.apply(x, update=update)

        # concat on cash_bias
        cash_bias_int = 0
        # FIXME not sure how to make shape with a flexible size in tensorflow but this works for now
        # cash_bias = tf.ones(shape=(batch_size,1,1,1)) * cash_bias_int
        cash_bias = x[:, :1, :1, :1] * 0
        x = tf.concat([cash_bias, x], 1)

        if 'activations' in self.summary_labels:
            summary = tf.summary.histogram(name='activations', values=x)
            self.summaries.append(summary)

        return x

    def tf_regularization_loss(self):
        if super(EIIE, self).tf_regularization_loss() is None:
            losses = list()
        else:
            losses = [super(EIIE, self).tf_regularization_loss()]

        if self.conv1.regularization_loss() is not None:
            losses.append(self.conv1.regularization_loss())
        if self.conv2.regularization_loss() is not None:
            losses.append(self.conv2.regularization_loss())
        if self.conv1.regularization_loss() is not None:
            losses.append(self.conv3.regularization_loss())

        if len(losses) > 0:
            return tf.add_n(inputs=losses)
        else:
            return None

    def get_variables(self, include_non_trainable=False):
        layer_variables = super(
            EIIE,
            self).get_variables(include_non_trainable=include_non_trainable)

        layer_variables += self.conv1.get_variables(
            include_non_trainable=include_non_trainable)
        layer_variables += self.conv2.get_variables(
            include_non_trainable=include_non_trainable)
        layer_variables += self.conv3.get_variables(
            include_non_trainable=include_non_trainable)

        layer_variables += self.nonlinearity.get_variables(
            include_non_trainable=include_non_trainable)
        layer_variables += self.nonlinearity.get_variables(
            include_non_trainable=include_non_trainable)

        return layer_variables
Exemplo n.º 5
0
class DDPGCriticNetwork(LayerBasedNetwork):
    def __init__(self,
                 scope='ddpg-critic-network',
                 summary_labels=(),
                 size_t0=400,
                 size_t1=300):
        super(DDPGCriticNetwork, self).__init__(scope=scope,
                                                summary_labels=summary_labels)

        self.t0l = Linear(size=size_t0, scope='linear0')
        self.t0b = TFLayer(layer='batch_normalization',
                           scope='batchnorm0',
                           center=True,
                           scale=True)
        self.t0n = Nonlinearity(name='relu', scope='relu0')

        self.t1l = Linear(size=size_t1, scope='linear1')
        self.t1b = TFLayer(layer='batch_normalization',
                           scope='batchnorm1',
                           center=True,
                           scale=True)
        self.t1n = Nonlinearity(name='relu', scope='relu1')

        self.t2d = Dense(size=1,
                         activation='tanh',
                         scope='dense0',
                         weights=tf.random_uniform_initializer(minval=-3e-3,
                                                               maxval=3e-3))

        self.add_layer(self.t0l)
        self.add_layer(self.t0b)
        self.add_layer(self.t0n)

        self.add_layer(self.t1l)
        self.add_layer(self.t1b)
        self.add_layer(self.t1n)

        self.add_layer(self.t2d)

    def tf_apply(self, x, internals, update, return_internals=False):
        assert x['states'], x['actions']

        if isinstance(x['states'], dict):
            if len(x['states']) != 1:
                raise TensorForceError(
                    'DDPG critic network must have only one state input, but {} given.'
                    .format(len(x['states'])))
            x_states = x['states'][next(iter(sorted(x['states'])))]
        else:
            x_states = x['states']

        if isinstance(x['actions'], dict):
            if len(x['actions']) != 1:
                raise TensorForceError(
                    'DDPG critic network must have only one action input, but {} given.'
                    .format(len(x['actions'])))
            x_actions = x['actions'][next(iter(sorted(x['actions'])))]
        else:
            x_actions = x['actions']

        out = self.t0l.apply(x=x_states, update=update)
        out = self.t0b.apply(x=out, update=update)
        out = self.t0n.apply(x=out, update=update)

        out = self.t1l.apply(x=tf.concat([out, x_actions], axis=1),
                             update=update)
        out = self.t1b.apply(x=out, update=update)
        out = self.t1n.apply(x=out, update=update)

        out = self.t2d.apply(x=out, update=update)

        # Remove last dimension because we only return Q values for one state and action
        # out = tf.squeeze(out)

        if return_internals:
            # Todo: Internals management
            return out, None
        else:
            return out
Exemplo n.º 6
0
class EIIE_OutPut(Layer):
    """
    EIIE Output layer
    based on 2-dimensional convolutional layer.
    use of two entries:
    - * : the precedent treatments
    - last_w : the last weights coming from environment
    """
    def __init__(self,
                 l2_regularization=0.0,
                 l1_regularization=0.0,
                 scope='eieeoutput',
                 summary_labels=()):
        """
        2D convolutional layer.

        Args:
            size: Number of filters set to 1
            window: Convolution window size, either an integer or pair of integers. calculated
            stride: Convolution stride, either an integer or pair of integers.
            padding: Convolution padding, one of 'VALID' or 'SAME'
            bias: If true, a bias is added
            activation: Type of nonlinearity, or dict with name & arguments
            l2_regularization: L2 regularization weight
            l1_regularization: L1 regularization weight
        """
        self.size = 1
        self.stride = 1
        self.padding = 'VALID'
        self.bias = True
        activation = 'relu'
        self.l2_regularization = l2_regularization
        self.l1_regularization = l1_regularization
        self.nonlinearity = Nonlinearity(name=activation,
                                         summary_labels=summary_labels)
        super(EIIE_OutPut, self).__init__(scope=scope,
                                          summary_labels=summary_labels)

    def tf_apply(self, x, update):
        if util.rank(x) != 4:
            raise TensorForceError(
                'Invalid input rank for conv2d layer: {}, must be 4'.format(
                    util.rank(x)))

        self.window = (1, x.shape[2])
        filters_shape = self.window + (x.shape[3].value, self.size)
        stddev = min(0.1, sqrt(2.0 / self.size))
        filters_init = tf.random_normal_initializer(mean=0.0,
                                                    stddev=stddev,
                                                    dtype=tf.float32)
        self.filters = tf.get_variable(name='W',
                                       shape=filters_shape,
                                       dtype=tf.float32,
                                       initializer=filters_init)
        stride_h, stride_w = self.stride if type(self.stride) is tuple else (
            self.stride, self.stride)
        x = tf.nn.conv2d(input=x,
                         filter=self.filters,
                         strides=(1, stride_h, stride_w, 1),
                         padding=self.padding)

        if self.bias:
            bias_shape = (self.size, )
            bias_init = tf.zeros_initializer(dtype=tf.float32)
            self.bias = tf.get_variable(name='b',
                                        shape=bias_shape,
                                        dtype=tf.float32,
                                        initializer=bias_init)
            x = tf.nn.bias_add(value=x, bias=self.bias)

        x = self.nonlinearity.apply(x=x, update=update)

        if 'activations' in self.summary_labels:
            summary = tf.summary.histogram(name='activations', values=x)
            self.summaries.append(summary)

        return x

    def tf_regularization_loss(self):
        regularization_loss = super(EIIE_OutPut, self).tf_regularization_loss()
        if regularization_loss is None:
            losses = list()
        else:
            losses = [regularization_loss]

        if self.l2_regularization > 0.0:
            losses.append(self.l2_regularization *
                          tf.nn.l2_loss(t=self.filters))
            if self.bias is not None:
                losses.append(self.l2_regularization *
                              tf.nn.l2_loss(t=self.bias))

        if self.l1_regularization > 0.0:
            losses.append(self.l1_regularization *
                          tf.reduce_sum(input_tensor=tf.abs(x=self.filters)))
            if self.bias is not None:
                losses.append(self.l1_regularization *
                              tf.reduce_sum(input_tensor=tf.abs(x=self.bias)))

        regularization_loss = self.nonlinearity.regularization_loss()
        if regularization_loss is not None:
            losses.append(regularization_loss)

        if len(losses) > 0:
            return tf.add_n(inputs=losses)
        else:
            return None

    def get_variables(self, include_non_trainable=False):
        layer_variables = super(
            EIIE_OutPut,
            self).get_variables(include_non_trainable=include_non_trainable)
        nonlinearity_variables = self.nonlinearity.get_variables(
            include_non_trainable=include_non_trainable)

        return layer_variables + nonlinearity_variables

    def get_summaries(self):
        layer_summaries = super(EIIE_OutPut, self).get_summaries()
        nonlinearity_summaries = self.nonlinearity.get_summaries()

        return layer_summaries + nonlinearity_summaries