Пример #1
0
def inference_ref2(input, is_train, use_bnorm=False):
    with tf.name_scope('conv1'):
        x = tf.layers.conv2d(
            inputs=input,
            filters=32,
            kernel_size=3,
            padding="same",
            activation=tf.nn.relu,
            use_bias=not use_bnorm,
            kernel_initializer=init_ops.glorot_normal_initializer())
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = tf.layers.conv2d(
            inputs=x,
            filters=32,
            kernel_size=3,
            padding="same",
            activation=tf.nn.relu,
            use_bias=not use_bnorm,
            kernel_initializer=init_ops.glorot_normal_initializer())
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = tf.layers.max_pooling2d(inputs=x, pool_size=2, strides=2)

    with tf.name_scope('conv2'):
        x = tf.layers.conv2d(
            inputs=x,
            filters=64,
            kernel_size=3,
            padding="same",
            activation=tf.nn.relu,
            use_bias=not use_bnorm,
            kernel_initializer=init_ops.glorot_normal_initializer())
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = tf.layers.conv2d(
            inputs=x,
            filters=64,
            kernel_size=3,
            padding="same",
            activation=tf.nn.relu,
            use_bias=not use_bnorm,
            kernel_initializer=init_ops.glorot_normal_initializer())
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = tf.layers.max_pooling2d(inputs=x, pool_size=2, strides=2)

    with tf.name_scope('dense1'):
        # x = tf.reshape(x, [-1, np.prod(x.get_shape()[1:])])
        x = tf.reshape(x, [x.get_shape()[0].value, -1])
        x = tf.layers.dense(inputs=x, units=512, activation=tf.nn.relu)

    with tf.name_scope('dense2'):
        x = tf.layers.dense(inputs=x, units=cifar10.NB_CLASSES)

    return x
Пример #2
0
 def test_glorot_normal_initializer(self):
     tensor_shape = (5, 6, 4, 2)
     with self.cached_session():
         fan_in, fan_out = init_ops._compute_fans(tensor_shape)
         std = np.sqrt(2. / (fan_in + fan_out))
         self._runner(init_ops.glorot_normal_initializer(seed=123),
                      tensor_shape,
                      target_mean=0.,
                      target_std=std)
Пример #3
0
  def __init__(self, num_units, depth, forget_bias=1.0,
               state_is_tuple=True, use_peepholes=False,
               activation=None, gate_activation=None,
               cell_activation=None,
               initializer=None,
               input_gate_initializer=None,
               use_bias=True, reuse=None, name=None):
    """Initialize the basic NLSTM cell.

    Args:
      num_units: `int`, The number of hidden units of each cell state
        and hidden state.
      depth: `int`, The number of layers in the nest.
      forget_bias: `float`, The bias added to forget gates.
      state_is_tuple: If `True`, accepted and returned states are tuples of
        the `h_state` and `c_state`s.  If `False`, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      use_peepholes: `bool`(optional).
      activation: Activation function of the update values,
        including new inputs and new cell states.  Default: `tanh`.
      gate_activation: Activation function of the gates,
        including the input, ouput, and forget gate. Default: `sigmoid`.
      cell_activation: Activation function of the first cell gate. Default: `identity`.
        Note that in the paper only the first cell_activation is identity.
      initializer: Initializer of kernel. Default: `orthogonal_initializer`.
      input_gate_initializer: Initializer of input gates.
        Default: `glorot_normal_initializer`.
      use_bias: `bool`. Default: `True`.
      reuse: `bool`(optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
      name: `str`, the name of the layer. Layers with the same name will
        share weights, but to avoid mistakes we require reuse=True in such
        cases.
    """
    super(NLSTMCell, self).__init__(_reuse=reuse, name=name)
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)

    # Inputs must be 2-dimensional.
    self.input_spec = base_layer.InputSpec(ndim=2)
    self._num_units = num_units
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._use_peepholes = use_peepholes
    self._depth = depth
    self._activation = activation or math_ops.tanh
    self._gate_activation = gate_activation or math_ops.sigmoid
    self._cell_activation = cell_activation or array_ops.identity
    self._initializer = initializer or init_ops.orthogonal_initializer()
    self._input_gate_initializer = (input_gate_initializer 
                                    or init_ops.glorot_normal_initializer())
    self._use_bias = use_bias
    self._kernels = None
    self._biases = None
    self.built = False
Пример #4
0
 def test_glorot_normal_initializer(self):
   tensor_shape = (5, 6, 4, 2)
   with self.cached_session():
     fan_in, fan_out = init_ops._compute_fans(tensor_shape)
     std = np.sqrt(2. / (fan_in + fan_out))
     self._runner(
         init_ops.glorot_normal_initializer(seed=123),
         tensor_shape,
         target_mean=0.,
         target_std=std)
Пример #5
0
 def suanet_v2_arg_scope(_weight_decay=weight_decay):
     with arg_scope([layers.conv2d, layers_lib.fully_connected],
                    activation_fn=nn_ops.relu,
                    biases_initializer=init_ops.glorot_normal_initializer(),
                    weights_regularizer=regularizers.l2_regularizer(
                        _weight_decay)):
         with arg_scope([layers.conv2d], padding='SAME'):
             with arg_scope([layers_lib.max_pool2d],
                            padding='SAME') as arg_sc:
                 return arg_sc
Пример #6
0
def clone_model_fn(features, labels, mode):
    """Model function for DN."""

    # Input Layer
    input_layer = tf.convert_to_tensor(features['x'])

    hidden_layer1 = tf.layers.dense(
        inputs=input_layer,
        activation=tf.nn.tanh,
        kernel_initializer=init_ops.glorot_uniform_initializer(),
        bias_initializer=init_ops.glorot_normal_initializer(),
        units=64)
    drop1 = tf.layers.dropout(inputs=hidden_layer1, rate=0.2)

    output_layer = tf.layers.dense(
        inputs=drop1,
        activation=tf.nn.tanh,
        bias_initializer=init_ops.glorot_uniform_initializer(),
        units=17)
    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        "actions": output_layer,
        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
        # `logging_hook`.
        "probabilities": tf.nn.softmax(output_layer, name="softmax_tensor"),
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.losses.mean_squared_error(labels=labels,
                                        predictions=output_layer)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.ProximalAdagradOptimizer(learning_rate=0.01)
        train_op = optimizer.minimize(loss=loss,
                                      global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
        "mse":
        tf.metrics.mean_squared_error(labels=labels,
                                      predictions=predictions["actions"])
    }
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      eval_metric_ops=eval_metric_ops)
Пример #7
0
def MADE_layer(inputs, units, mask, name, activation):

    kernel_initializer = init_ops.glorot_normal_initializer()

    def masked_initializer(
            shape=(units, inputs), dtype=None, partition_info=None):
        return mask.T * kernel_initializer(shape, dtype, partition_info)

    with ops.name_scope(name, "masked_dense", [inputs, units]):
        layer = layers.Dense(units,
                             activation=activation,
                             kernel_initializer=masked_initializer,
                             kernel_constraint=lambda x: mask.T * x,
                             name=name)  #,_reuse=tf.AUTO_REUSE)
    return layer.apply(inputs)
def _deep_architecture(input_layer, params, mode):
    """
    Return the output operation following the deep part of network.
    :param input_layer: (Tensor) Input
    :param params: (HParams) Hyperparameters (needs to have "hidden_units", "dropout", "batch_norm", "l2_reg",
    "dnn_activation_fn")
    :param mode: (ModeKeys) Specifies if training, evaluation or prediction. Dropout and BN need it.
    :return: Output Op for the deep part.
    """
    hidden_units = params.hidden_units
    dropout = params.dnn_dropout
    is_bn = params.batch_norm
    l2_reg = params.l2_reg

    net = input_layer

    # regularizer(deep)
    if l2_reg:
        regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
    else:
        regularizer = None

    for layer_id, num_hidden in enumerate(hidden_units):
        with tf.variable_scope('hidden_layer_%d' %
                               layer_id) as hidden_layer_scope:
            net = tf.layers.dense(
                inputs=net,
                units=num_hidden,
                activation=params.dnn_activation_fn,
                kernel_initializer=glorot_normal_initializer(),
                kernel_regularizer=regularizer,
                name=hidden_layer_scope)
            # use Batch Normalization(last layer use no BN, BN after relu)
            if is_bn and layer_id < len(hidden_units) - 1:
                is_training = mode == tf.estimator.ModeKeys.TRAIN
                net = _batch_normalization(input=net,
                                           is_training=is_training,
                                           scope='bn_%d' % layer_id)

            # dropout
            if dropout is not None and mode == tf.estimator.ModeKeys.TRAIN:
                with tf.name_scope('dropout'):
                    net = tf.layers.dropout(net,
                                            rate=dropout,
                                            training=True,
                                            name="dropout")
        _add_layer_summary(net, 'hidden_layer_%d' % layer_id)
    return net
Пример #9
0
def FC_layer(inputs,
             output_dim,
             nonlinearity=tf.nn.relu,
             is_training=None,
             dropout_prob=0.,
             name='FC_layer'):

    if dropout_prob > 0.:
        inputs = tf.layers.dropout(inputs, dropout_prob, training=is_training)

    outputs = tf.layers.dense(
        inputs,
        output_dim,
        nonlinearity,
        kernel_initializer=init_ops.glorot_normal_initializer(),
        bias_initializer=init_ops.zeros_initializer(),
        name=name)

    return outputs
Пример #10
0
def get(identifier, **kwargs):
    if identifier is None or isinstance(identifier, init_ops.Initializer):
        return identifier
    if np.isscalar(identifier) and identifier == 0.: identifier = 'zeros'

    # TODO: ...
    if callable(identifier): return identifier

    elif isinstance(identifier, six.string_types):
        # If identifier is a string
        identifier = identifier.lower()
        if identifier in ['random_uniform']:
            rng = kwargs.get('range', None)
            low, high = checker.get_range(rng)
            return init_ops.RandomUniform(minval=low, maxval=high)
        elif identifier in ['random_norm', 'random_normal']:
            mean = kwargs.get('mean', 0.)
            stddev = kwargs.get('stddev', 1.)
            return init_ops.truncated_normal_initializer(mean=mean,
                                                         stddev=stddev)
        elif identifier in ['glorot_uniform', 'xavier_uniform']:
            return glorot_uniform()
        elif identifier in ['glorot_normal', 'xavier_normal']:
            return init_ops.glorot_normal_initializer()
        elif identifier in ['id', 'identity']:
            return identity()
        else:
            # Find initializer in tensorflow.python.ops.init_ops
            initializer = (init_ops.__dict__.get(identifier, None)
                           or init_ops.__dict__.get(
                               '{}_initializer'.format(identifier), None))
            # If nothing is found
            if initializer is None:
                raise ValueError('Can not resolve "{}"'.format(identifier))
            # Return initializer with default parameters
            return initializer
    elif np.isscalar(identifier):
        # Note string is scalar
        return tf.initializers.constant(value=identifier)
    else:
        raise TypeError('identifier must be a Initializer or a string')
Пример #11
0
def masked_dense(inputs: tf.Tensor,
                 units: int,
                 mask: np.ndarray,
                 activation=None,
                 kernel_initializer=None,
                 reuse=None,
                 name=None,
                 *args,
                 **kwargs) -> tf.Tensor:
    """This code has been copied from masked_dense implementation in
    Tensorflow. See TF documentation:
    https://www.tensorflow.org/api_docs/python/tf/contrib/distributions/bijectors/masked_dense

    """

    input_depth = inputs.shape.with_rank_at_least(1)[-1].value
    if input_depth is None:
        raise NotImplementedError(
            "Rightmost dimension must be known prior to graph execution.")

    if kernel_initializer is None:
        kernel_initializer = init_ops.glorot_normal_initializer()

    def masked_initializer(shape, dtype=None, partition_info=None):
        return mask * kernel_initializer(shape, dtype, partition_info)

    with ops.name_scope(name, "masked_dense", [inputs, units]):
        layer = layers.Dense(units,
                             activation=activation,
                             kernel_initializer=masked_initializer,
                             kernel_constraint=lambda x: mask * x,
                             name=name,
                             dtype=inputs.dtype.base_dtype,
                             _scope=name,
                             _reuse=reuse,
                             *args,
                             **kwargs)
        return layer.apply(inputs)
Пример #12
0
def _base_model(features, mode, params):
    """base model is DNN"""
    hidden_units = params.hidden_units
    dropout = params.dnn_dropout

    net = tf.feature_column.input_layer(features=features,
                                        feature_columns=params.feature_columns)
    for layer_id, num_hidden in enumerate(hidden_units):
        with tf.variable_scope('hiddenlayer_%d' %
                               layer_id) as hidden_layer_scope:
            net = tf.layers.dense(
                inputs=net,
                units=num_hidden,
                activation=params.dnn_activation_fn,
                kernel_initializer=glorot_normal_initializer(),
                name=hidden_layer_scope)
            if dropout is not None and mode == tf.estimator.ModeKeys.TRAIN:
                net = tf.layers.dropout(net,
                                        rate=dropout,
                                        training=True,
                                        name='dropout')
    # logits
    logits = tf.layers.dense(net, 1, activation=None)
    return logits
def masked_dense(inputs,
                 units,
                 num_blocks=None,
                 exclusive=False,
                 kernel_initializer=None,
                 reuse=None,
                 name=None,
                 *args,
                 **kwargs):
  """A autoregressively masked dense layer. Analogous to `tf.layers.dense`.

  See [1] for detailed explanation.

  [1]: "MADE: Masked Autoencoder for Distribution Estimation."
       Mathieu Germain, Karol Gregor, Iain Murray, Hugo Larochelle. ICML. 2015.
       https://arxiv.org/abs/1502.03509

  Arguments:
    inputs: Tensor input.
    units: Python `int` scalar representing the dimensionality of the output
      space.
    num_blocks: Python `int` scalar representing the number of blocks for the
      MADE masks.
    exclusive: Python `bool` scalar representing whether to zero the diagonal of
      the mask, used for the first layer of a MADE.
    kernel_initializer: Initializer function for the weight matrix.
      If `None` (default), weights are initialized using the
      `tf.glorot_random_initializer`.
    reuse: Python `bool` scalar representing whether to reuse the weights of a
      previous layer by the same name.
    name: Python `str` used to describe ops managed by this function.
    *args: `tf.layers.dense` arguments.
    **kwargs: `tf.layers.dense` keyword arguments.

  Returns:
    Output tensor.

  Raises:
    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
      graph execution.
  """
  # TODO(b/67594795): Better support of dynamic shape.
  input_depth = inputs.shape.with_rank_at_least(1)[-1].value
  if input_depth is None:
    raise NotImplementedError(
        "Rightmost dimension must be known prior to graph execution.")

  mask = _gen_mask(num_blocks, input_depth, units,
                   MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T

  if kernel_initializer is None:
    kernel_initializer = init_ops.glorot_normal_initializer()

  def masked_initializer(shape, dtype=None, partition_info=None):
    return mask * kernel_initializer(shape, dtype, partition_info)

  with ops.name_scope(name, "masked_dense", [inputs, units, num_blocks]):
    layer = layers.Dense(
        units,
        kernel_initializer=masked_initializer,
        kernel_constraint=lambda x: mask * x,
        name=name,
        dtype=inputs.dtype.base_dtype,
        _scope=name,
        _reuse=reuse,
        *args,
        **kwargs)
    return layer.apply(inputs)
Пример #14
0
def masked_dense(inputs,
                 units,
                 num_blocks=None,
                 exclusive=False,
                 kernel_initializer=None,
                 reuse=None,
                 name=None,
                 *args,
                 **kwargs):
    """A autoregressively masked dense layer. Analogous to `tf.layers.dense`.

  See [Germain et al. (2015)][1] for detailed explanation.

  Arguments:
    inputs: Tensor input.
    units: Python `int` scalar representing the dimensionality of the output
      space.
    num_blocks: Python `int` scalar representing the number of blocks for the
      MADE masks.
    exclusive: Python `bool` scalar representing whether to zero the diagonal of
      the mask, used for the first layer of a MADE.
    kernel_initializer: Initializer function for the weight matrix.
      If `None` (default), weights are initialized using the
      `tf.glorot_random_initializer`.
    reuse: Python `bool` scalar representing whether to reuse the weights of a
      previous layer by the same name.
    name: Python `str` used to describe ops managed by this function.
    *args: `tf.layers.dense` arguments.
    **kwargs: `tf.layers.dense` keyword arguments.

  Returns:
    Output tensor.

  Raises:
    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
      graph execution.

  #### References

  [1]: Mathieu Germain, Karol Gregor, Iain Murray, and Hugo Larochelle. MADE:
       Masked Autoencoder for Distribution Estimation. In _International
       Conference on Machine Learning_, 2015. https://arxiv.org/abs/1502.03509
  """
    # TODO(b/67594795): Better support of dynamic shape.
    input_depth = inputs.shape.with_rank_at_least(1)[-1].value
    if input_depth is None:
        raise NotImplementedError(
            "Rightmost dimension must be known prior to graph execution.")

    mask = _gen_mask(num_blocks, input_depth, units,
                     MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T

    if kernel_initializer is None:
        kernel_initializer = init_ops.glorot_normal_initializer()

    def masked_initializer(shape, dtype=None, partition_info=None):
        return mask * kernel_initializer(shape, dtype, partition_info)

    with ops.name_scope(name, "masked_dense", [inputs, units, num_blocks]):
        layer = layers.Dense(units,
                             kernel_initializer=masked_initializer,
                             kernel_constraint=lambda x: mask * x,
                             name=name,
                             dtype=inputs.dtype.base_dtype,
                             _scope=name,
                             _reuse=reuse,
                             *args,
                             **kwargs)
        return layer.apply(inputs)
Пример #15
0
def inference_bin(input, is_train, stochastic=False, use_bnorm=False):
    with tf.name_scope('128C3-128C3-P2'):
        x = conv2d_bin(stochastic=stochastic,
                       inputs=input,
                       filters=128,
                       kernel_size=3,
                       padding="same",
                       activation=tf.nn.relu,
                       is_train=is_train,
                       use_bias=not use_bnorm,
                       kernel_initializer=init_ops.glorot_normal_initializer())
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = conv2d_bin(stochastic=stochastic,
                       inputs=x,
                       filters=128,
                       kernel_size=3,
                       padding="same",
                       activation=tf.nn.relu,
                       is_train=is_train,
                       use_bias=not use_bnorm,
                       kernel_initializer=init_ops.glorot_normal_initializer())
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = tf.layers.max_pooling2d(inputs=x, pool_size=2, strides=2)

    with tf.name_scope('256C3-256C3-P2'):
        x = conv2d_bin(stochastic=stochastic,
                       inputs=x,
                       filters=256,
                       kernel_size=3,
                       padding="same",
                       activation=tf.nn.relu,
                       is_train=is_train,
                       use_bias=not use_bnorm,
                       kernel_initializer=init_ops.glorot_normal_initializer())
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = conv2d_bin(stochastic=stochastic,
                       inputs=x,
                       filters=256,
                       kernel_size=3,
                       padding="same",
                       activation=tf.nn.relu,
                       is_train=is_train,
                       use_bias=not use_bnorm,
                       kernel_initializer=init_ops.glorot_normal_initializer())
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = tf.layers.max_pooling2d(inputs=x, pool_size=2, strides=2)

    with tf.name_scope('512C3-512C3-P2'):
        x = conv2d_bin(stochastic=stochastic,
                       inputs=x,
                       filters=512,
                       kernel_size=3,
                       padding="same",
                       activation=tf.nn.relu,
                       is_train=is_train,
                       use_bias=not use_bnorm,
                       kernel_initializer=init_ops.glorot_normal_initializer())
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = conv2d_bin(stochastic=stochastic,
                       inputs=x,
                       filters=512,
                       kernel_size=3,
                       padding="same",
                       activation=tf.nn.relu,
                       is_train=is_train,
                       use_bias=not use_bnorm,
                       kernel_initializer=init_ops.glorot_normal_initializer())
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = tf.layers.max_pooling2d(inputs=x, pool_size=2, strides=2)

    with tf.name_scope('1024FC-1024FC-10FC'):
        x = tf.reshape(x, [x.get_shape()[0].value, -1])
        x = dense_bin(inputs=x,
                      units=1024,
                      stochastic=stochastic,
                      is_train=is_train,
                      activation=tf.nn.relu,
                      use_bias=not use_bnorm)
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = dense_bin(inputs=x,
                      units=1024,
                      stochastic=stochastic,
                      is_train=is_train,
                      activation=tf.nn.relu,
                      use_bias=not use_bnorm)
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
        x = dense_bin(inputs=x,
                      units=cifar10.NB_CLASSES,
                      stochastic=stochastic,
                      is_train=is_train,
                      use_bias=not use_bnorm)
        if use_bnorm:
            x = tf.layers.batch_normalization(inputs=x, training=is_train)
    return x
Пример #16
0
 def _initializer(shape, dtype, partition_info=None):
     if len(shape) == 2 and (shape[0] == shape[1]):
         return tf.orthogonal_initializer()(shape, dtype, partition_info)
     else:
         return glorot_normal_initializer()(shape, dtype, partition_info)
    def ave_model_fn(features, labels, mode):
        """ 
        The variational autoencoder function
            
        The input should be the following
                features    --  a dict containing two keys 'x' and 'y'. The 'x' value should contain the time series
                                of the area around the point of interrest. The format should be 9*9*696. However, the
                                correct format is not enforced.
                                'y' should contain the position of the example. We might want to change it into a 'lat'
                                and a 'lon' parameter.
                labels      --  this is meaningless and should be set to a constant zero. Labels are not needed for
                                a variational autoencoder
                mode        --  This contains the mode the network is used in and should be one of the following values
                                tf.estimator.ModeKeys.TRAIN     - if the network is trained at the moment
                                tf.estimator.ModeKeys.PREDICT   - if the network is used to get information on the examples
                                tf.estimator.ModeKeys.EVAL      - if the network is used to get general information about
                                                                    the performance of the network like a mean error on
                                                                    all data samples
                                if the value equals non of the above, the network will use the EVAL-Key
        """

        # the input layer reshapes the input into the disired form
        input_layer = tf.reshape(features['x'],
                                 [-1, time_size, 1, 1, 1])  # HAS TO BE CHANGED
        input_layer = tf.transpose(input_layer, [0, 2, 3, 1, 4])

        input_slice_center = input_layer

        # All of the tensors used in the encoding part will start with the word 'encoding/'
        with tf.variable_scope('encoding'):

            # WE HAVE TO INCLUDE THE AFECTED LAYERS DEPENDING ONT THEIR SIZE. LET'S START WITH 696

            if time_size == 696:

                # We use a batch norm at the beginnin. This sets the mean of the inputdata in every step to 0 and the
                # biggest variance to one. The idea is to approximate a normal distribution. We should be careful here
                # if the batchsize is to low
                batch_norm_1 = tf.layers.batch_normalization(
                    inputs=input_slice_center,
                    training=(mode == tf.estimator.ModeKeys.TRAIN),
                    name='batchnorm1')

                # The first convolution layer. We use strided convolution as it leads to better reproducebility
                # compared to max pooling. << acording to 'Unsupervised Representation learning with deep convolutional
                # genartive adversarial networks' by Radford et al.
                # We also followed there advise to put batch normalization in between every layer and use ReLU
                # activations
                conv_1 = tf.layers.conv3d(
                    inputs=batch_norm_1,
                    filters=128,
                    kernel_size=(1, 1, 4),
                    strides=(1, 1, 2),
                    padding='valid',
                    activation=tf.nn.relu,
                    kernel_initializer=glorot_normal_initializer(),
                    name='conv1')
                pass  # End of the if 696

            if time_size == 1392:

                batch_norm_0 = tf.layers.batch_normalization(
                    inputs=input_slice_center,
                    training=(mode == tf.estimator.ModeKeys.TRAIN),
                    name='batchnorm0')

                #The first convolution layer. We use strided convolution as it leads to better reproducebility
                #compared to max pooling. << acording to 'Unsupervised Representation learning with deep convolutional
                #genartive adversarial networks' by Radford et al.
                #We also followed there advise to put batch normalization in between every layer and use ReLU
                #activations
                conv_0 = tf.layers.conv3d(
                    inputs=batch_norm_0,
                    filters=128,
                    kernel_size=(1, 1, 4),
                    strides=(1, 1, 2),
                    padding='valid',
                    activation=tf.nn.relu,
                    kernel_initializer=glorot_normal_initializer(),
                    name='conv0')

                batch_norm_1 = tf.layers.batch_normalization(
                    inputs=conv_0,
                    training=(mode == tf.estimator.ModeKeys.TRAIN),
                    name='batchnorm1')

                conv_1 = tf.layers.conv3d(
                    inputs=batch_norm_1,
                    filters=128,
                    kernel_size=(1, 1, 4),
                    strides=(1, 1, 2),
                    padding='valid',
                    activation=tf.nn.relu,
                    kernel_initializer=glorot_normal_initializer(),
                    name='conv1')

                pass  # End of the if 1392

            # batch normalization makes the activation almost standart normaly distributed in every layer
            batch_norm_2 = tf.layers.batch_normalization(
                inputs=conv_1,
                training=(mode == tf.estimator.ModeKeys.TRAIN),
                name='batchnorm2')

            # a second convolutional layer
            conv_2 = tf.layers.conv3d(
                inputs=batch_norm_2,
                filters=64 * 3,
                kernel_size=(1, 1, 4),
                strides=(1, 1, 2),
                padding='valid',
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                name='conv2')
            # again a batch normalization layer
            batch_norm_3 = tf.layers.batch_normalization(
                inputs=conv_2,
                training=(mode == tf.estimator.ModeKeys.TRAIN),
                name='batchnorm3')
            # third convolutional layer
            conv_3 = tf.layers.conv3d(
                inputs=batch_norm_3,
                filters=64 * 4,
                kernel_size=(1, 1, 4),
                strides=(1, 1, 2),
                padding='valid',
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                name='conv3')

            # batch normalization in between every layer
            batch_norm_4 = tf.layers.batch_normalization(
                inputs=conv_3,
                training=(mode == tf.estimator.ModeKeys.TRAIN),
                name='batchnorm4')

            # fourth convolutional layer
            conv_4 = tf.layers.conv3d(
                inputs=batch_norm_4,
                filters=64 * 4,
                kernel_size=(1, 1, 4),
                strides=(1, 1, 2),
                padding='valid',
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                name='conv4')

            # batch normalization in between every layer
            batch_norm_5 = tf.layers.batch_normalization(
                inputs=conv_4,
                training=(mode == tf.estimator.ModeKeys.TRAIN),
                name='batchnorm5')

            # 5. convolutional layer
            conv_5 = tf.layers.conv3d(
                inputs=batch_norm_5,
                filters=64 * 4,
                kernel_size=(1, 1, 4),
                strides=(1, 1, 2),
                padding='valid',
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                name='conv5')

            # batch normalization in between every layer
            batch_norm_6 = tf.layers.batch_normalization(
                inputs=conv_5,
                training=(mode == tf.estimator.ModeKeys.TRAIN),
                name='batchnorm6')

            # 6. convolutional layer
            conv_6 = tf.layers.conv3d(
                inputs=batch_norm_6,
                filters=64 * 6,
                kernel_size=(1, 1, 4),
                strides=(1, 1, 2),
                padding='valid',
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                name='conv6')

            # batch normalization in between every layer
            batch_norm_7 = tf.layers.batch_normalization(
                inputs=conv_6,
                training=(mode == tf.estimator.ModeKeys.TRAIN),
                name='batchnorm7')

            # the tensor is flattened with reshape instead of flatten to allow for older versions of tf. The drawback
            # is that we need to put the size of the vector in manually
            flatten = tf.contrib.layers.flatten(batch_norm_7)

            # against the advise of Radford et al.,we use a fully connected layer here. We calculate the means of the
            # latent distributions. Since the mean of such a distribution could as well be negative we use no ReLU activation
            # but instead a linear activation
            means = tf.layers.dense(
                inputs=flatten,
                units=encoding_size,
                activation=None,
                kernel_initializer=glorot_normal_initializer(),
                name='fullyconnected_means')

            # for the standart diviations we again use a fully connected layer. Since these should not be negative, we use
            # ReLU activations here.
            deviations = tf.layers.dense(
                inputs=flatten,
                units=encoding_size,
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                name='fullyconnected_dev')

        # This completes the encoding part since the latent distributions are assumed to be normal and therfore are completely
        # defined by the mean and diviation
        # Here we begin the middle part. All tensors from the middle part, where we draw the encoding from the latent distributions
        # start with 'vae/'
        with tf.variable_scope('vae'):

            # Draw numbers from a standart normal distribution. The numbers have the same shape as means and as seed we use the
            # seed set above. If no seed is set, the time will be used but the experiment will not be reproducible.
            random_numbers = tf.random_normal(shape=tf.shape(means),
                                              mean=0.0,
                                              stddev=1.0,
                                              seed=random_seed,
                                              name='random_number_generator')

            # Since drawing from a normal distribution with standart diviation σ is the same as drawing from a standart normal
            # distribution and multypling by σ, we multiply the drawn values by the deviations
            scaled_random = tf.multiply(x=random_numbers,
                                        y=deviations,
                                        name='adjust_variance')

            # Since drawing from a normal distribution with mean μ is the same as drawing from a normal distribution with mean
            # 0 and adding μ we add the means
            encoding = tf.add(x=scaled_random, y=means, name='adjust_means')

            # now the encodings contain values wich are normally distributed with means 'means' and deviations 'deviations' but we
            # can still use backpropagation since we do not need to propagate back through the random number generator

        # Here begins the decoding part of the network. All tensors here will begin with 'decoding/'
        with tf.variable_scope('decoding'):

            # We start withe a fully connected layer. This gives the network a chance to rearange the features and makes the architecture
            # of the decoder somehow independent of the compression factor ϑ, since the dimensionaliety will be the same after this step
            fc_decoding = tf.layers.dense(
                inputs=encoding,
                units=20 * 64 * 4,
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                name='fullyconnected')

            # We reverse the flattening of the tensors and regain a shape suitable for a inverse convolution
            input_layer_decoding = tf.reshape(fc_decoding,
                                              [-1, 1, 1, 20, 64 * 4])

            # Since the centering is a big part of the encoding we do not use a batch normalization at this position in the network

            # for reconstruction we use transposed convolutional layers. These produce an inverse of a convolutional layer.
            deconv_1 = tf.layers.conv3d_transpose(
                inputs=input_layer_decoding,
                filters=64 * 6,
                kernel_size=(1, 1, 4),
                strides=(1, 1, 2),
                padding='valid',
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                use_bias=True,
                name='deconv1')

            # From here on again batch normalization in between every layer
            deconv_1_bn = tf.layers.batch_normalization(
                inputs=deconv_1,
                training=(mode == tf.estimator.ModeKeys.TRAIN),
                name='batchnorm2')

            # Since we incoded using three convolutions, we decode using 3 transposed convolutions
            deconv_2 = tf.layers.conv3d_transpose(
                inputs=deconv_1_bn,
                filters=64 * 4,
                kernel_size=(1, 1, 4),
                strides=(1, 1, 2),
                padding='valid',
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                use_bias=True,
                name='deconv2')

            # Batch normalization in between every layers
            deconv_2_bn = tf.layers.batch_normalization(
                inputs=deconv_2,
                training=(mode == tf.estimator.ModeKeys.TRAIN),
                name='batchnorm3')

            # the third deconvolution should leed to the exact same size as the input
            deconv_3 = tf.layers.conv3d_transpose(
                inputs=deconv_2_bn,
                filters=64 * 3,
                kernel_size=(1, 1, 4),
                strides=(1, 1, 2),
                padding='valid',
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                use_bias=True,
                name='deconv3')

            # Batch normalization in between every layers
            deconv_3_bn = tf.layers.batch_normalization(
                inputs=deconv_3,
                training=(mode == tf.estimator.ModeKeys.TRAIN),
                name='batchnorm4')

            # the 4.  deconvolution should leed to the exact same size as the input
            deconv_4 = tf.layers.conv3d_transpose(
                inputs=deconv_3_bn,
                filters=128,
                kernel_size=(1, 1, 4),
                strides=(1, 1, 2),
                padding='valid',
                activation=tf.nn.relu,
                kernel_initializer=glorot_normal_initializer(),
                use_bias=True,
                name='deconv4')

            # Batch normalization in between every layers
            deconv_4_bn = tf.layers.batch_normalization(
                inputs=deconv_4,
                training=(mode == tf.estimator.ModeKeys.TRAIN),
                name='batchnorm5')

            # the 5.  deconvolution should leed to the exact same size as the input
            deconv_5 = tf.layers.conv3d_transpose(
                inputs=deconv_4_bn,
                filters=1,
                kernel_size=(1, 1, 4),
                strides=(1, 1, 2),
                padding='valid',
                activation=None,
                kernel_initializer=glorot_normal_initializer(),
                use_bias=True,
                name='deconv5')

            # ADD EXTRA CONVOLUTIONS FOR 1392 SIZE
            if time_size == 1392:

                #Batch normalization in between every layers
                deconv_5_bn = tf.layers.batch_normalization(
                    inputs=deconv_5,
                    training=(mode == tf.estimator.ModeKeys.TRAIN),
                    name='batchnorm6')

                #the 6. deconvolution should leed to the exact same size as the input
                deconv_6 = tf.layers.conv3d_transpose(
                    inputs=deconv_5_bn,
                    filters=1,
                    kernel_size=(1, 1, 4),
                    strides=(1, 1, 2),
                    padding='valid',
                    activation=None,
                    kernel_initializer=glorot_normal_initializer(),
                    use_bias=True,
                    name='deconv6')

                deconv_slice = tf.slice(deconv_6, [0, 0, 0, 0, 0],
                                        [-1, 1, 1, 1392, 1])

                pass  # END OF THE 1392 IF Start the other if

            if time_size == 696:

                deconv_slice = tf.slice(deconv_5, [0, 0, 0, 0, 0],
                                        [-1, 1, 1, 696, 1])

        # The network can report the following quantities:
        #   The reconstructed time series, mostly for comparison to the original
        #   The position given as the 'y' component. This should maybe be substituted for 'lat' and 'lon'
        #   The latent distributions
        predictions = {
            'timeseries': deconv_slice,
            'position': features['y'],
            'encoding_mean': means,
            'encoding_dev': deviations,
            'input': input_slice_center
        }

        # If the mode was to predict, the above quanteties are reportet back
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

        # If the mode was not set to predict, the loss gets calculatet.
        # The epsilon is a parameter to avoid numerical problems if the diviation of one variable goes to zero.
        epsilon = 0.0000001

        # The reconstruction error is the mean squared error of the original time series of the middle point and the
        # reconstructed time series of the middle point.

        reconstrucktion_error = tf.losses.mean_squared_error(
            labels=input_slice_center, predictions=deconv_slice)

        # The latent loss is the KL-Divergence between the latent distributions and a multivariat normal distribution
        latent_loss = tf.reduce_mean(0.5 * tf.reduce_sum(
            tf.square(means) + tf.square(deviations) -
            tf.log(tf.square(deviations) + epsilon) - 1, 1))

        # as loss we use the sum of both losses above weighted by a factor of λ. if λ = 1 both losses are weighted equally, if λ = 0 only the reconstruction loss
        # is taken into account and if λ is big the latent loss is much more important than the reconstruction loss
        lamb = 0.0000001
        loss = reconstrucktion_error + lamb * latent_loss

        # if the mode is set to train, the network now uses a minimizer to minimize the loss
        if mode == tf.estimator.ModeKeys.TRAIN:
            # As an optimizer we use Adam.
            optimizer = tf.train.AdamOptimizer(learning_rate=l_rate)

            # Since the training error does not depend on the sliding avarage used in the batch normalizations,
            # they are not automatically updatet. Therefore, they have to be updatet by hand
            batch_norm_update = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

            # update the weights and the batch norm parameters
            with tf.control_dependencies(batch_norm_update):
                train_op = optimizer.minimize(
                    loss=loss, global_step=tf.train.get_global_step())

            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        # if the mode is neighter predict nor train, we assume it to be eval

        # The evaluation metics are calculated. Here we calculate the mean squarred error and the
        # mean absolute error on the whole region, not just on the center
        eval_metric_ops = {
            'squared_error':
            tf.metrics.mean_squared_error(labels=input_slice_center,
                                          predictions=deconv_slice),
            'absolute_error':
            tf.metrics.mean_absolute_error(labels=input_slice_center,
                                           predictions=deconv_slice)
        }

        # the metrics are reported back
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)
Пример #18
0
def make_trainable(net, val):
    net.trainable = val
    for l in net.layers:
        l.trainable = val


shp = X_train.shape[1:]
dropout_rate = 0.25
opt = Adam(lr=1e-4)
dopt = Adam(lr=1e-3)

# Build Generative model ...
nch = 200
g_input = Input(shape=[100])
H = Dense(nch * 14 * 14,
          kernel_initializer=init_ops.glorot_normal_initializer())(g_input)
H = BatchNormalization()(H)
H = Activation('relu')(H)
H = Reshape([14, 14, nch])(H)
H = UpSampling2D(size=(2, 2))(H)
H = Conv2D(nch / 2,
           kernel_size=(3, 3),
           padding='same',
           kernel_initializer=init_ops.glorot_normal_initializer(),
           name='Convolution_1')(H)
H = BatchNormalization()(H)
H = Activation('relu')(H)
H = Conv2D(nch / 4,
           kernel_size=(3, 3),
           padding='same',
           kernel_initializer=init_ops.glorot_normal_initializer(),