def apply_dropout(x, level=0.5, noise_dims=None, noise_type='uniform', rescale=True, name="ApplyDropout"): """Computes dropout. With probability `keep_prob`, outputs the input element scaled up by `1 / keep_prob`, otherwise outputs `0`. The scaling is so that the expected sum is unchanged. Parameters ---------- x: A tensor. input tensor level: float(0.-1.) probability dropout values in given tensor noise_dims: int or list(int) these dimensions will be setted to 1 in noise_shape, and used to broadcast the dropout mask. noise_type: 'gaussian' (or 'normal'), 'uniform' distribution used for generating noise rescale: bool whether rescale the outputs by dividing the retain probablity seed: random seed or `tensor.rng` random generator from tensor class References ---------- [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) Note ---- This function only apply noise on Variable when training is enable """ shape = tf.shape(x) retain_prob = 1. - level # ====== not a training variable NO dropout ====== # if 'normal' in noise_type or 'gaussian' in noise_type: randfunc = lambda shape: tf.random_normal(shape=shape, mean=1.0, stddev=np.sqrt((1.0 - retain_prob) / retain_prob), dtype=x.dtype.base_dtype, seed=randint()) elif 'uniform' in noise_type: randfunc = lambda shape: random_binomial(shape=shape, p=retain_prob, dtype=x.dtype.base_dtype, seed=randint()) else: raise ValueError('No support for noise_type=' + noise_type) # ====== Dropout ====== # def training_fn(): noise_shape = shape if noise_dims is None else \ _process_noise_dim(shape, noise_dims) y = x * randfunc(shape=noise_shape) if rescale: y /= retain_prob return y def inference_fn(): return x with tf.variable_scope(name): return tf.cond(is_training(), training_fn, inference_fn)
def training_fn(): noise_shape = (shape if noise_dims is None else _process_noise_dim(shape, noise_dims)) if 'normal' in noise_type or 'gaussian' in noise_type: noise = tf.random_normal(shape=noise_shape, mean=0.0, stddev=level, dtype=x.dtype.base_dtype, seed=randint()) elif 'uniform' in noise_type: noise = tf.random_uniform(shape=noise_shape, minval=-level, maxval=level, dtype=x.dtype.base_dtype, seed=randint()) else: raise ValueError('No support for noise_type=' + noise_type) return x + noise
def __init__(self, num_units, W_init=init_ops.glorot_uniform_initializer(seed=randint()), b_init=init_ops.constant_initializer(0.), rnn_mode='lstm', num_layers=1, skip_input=False, is_bidirectional=False, return_states=False, dropout=0., **kwargs): super(CudnnRNN, self).__init__(**kwargs) # ====== defaults recurrent control ====== # self.num_units = int(num_units) self.num_layers = int(num_layers) self.rnn_mode = str(rnn_mode) self.skip_input = bool(skip_input) self.is_bidirectional = bool(is_bidirectional) self.return_states = bool(return_states) self.dropout = dropout self.W_init = W_init self.b_init = b_init if skip_input: wprint("`skip_input` is not supported in Tensorflow.")
def __init__(self, n_new_features, n_time_context, time_pool='max', backward=False, W_init=init_ops.glorot_uniform_initializer(seed=randint()), b_init=init_ops.constant_initializer(0), activation=K.linear, **kwargs): super(TimeDelayedDense, self).__init__(**kwargs) if n_new_features is None: self.n_new_features = [] else: self.n_new_features = as_tuple(n_new_features, t=int) self.n_time_context = int(n_time_context) self.n_layers = len(self.n_new_features) # ====== initialization ====== # self.W_init = W_init self.b_init = b_init # ====== activation ====== # if activation is None: activation = K.linear if not isinstance(activation, (tuple, list)): activation = (activation,) activation = [K.linear if i is None else i for i in activation] self.activation = as_tuple(activation, N=self.n_layers) # ====== time axis manipulation ====== # time_pool = str(time_pool).lower() assert time_pool in _allow_time_pool, \ "Only support: %s; but given: '%s'" % (str(_allow_time_pool), str(time_pool)) self.time_pool = time_pool self.backward = bool(backward)
def __init__(self, input_size, output_size, W_init=init_ops.random_uniform_initializer(seed=randint()), **kwargs): super(Embedding, self).__init__(**kwargs) self.input_size = input_size self.output_size = output_size self.W_init = W_init
def training_fn(): noise_shape = (shape if noise_dims is None else _process_noise_dim( shape, noise_dims)) if 'normal' in noise_type or 'gaussian' in noise_type: noise = tf.random_normal(shape=noise_shape, mean=0.0, stddev=level, dtype=x.dtype.base_dtype, seed=randint()) elif 'uniform' in noise_type: noise = tf.random_uniform(shape=noise_shape, minval=-level, maxval=level, dtype=x.dtype.base_dtype, seed=randint()) else: raise ValueError('No support for noise_type=' + noise_type) return x + noise
def __init__(self, n_new_features, n_time_context, time_pool='max', backward=False, W_init=init_ops.glorot_uniform_initializer(seed=randint()), b_init=init_ops.constant_initializer(0), activation=K.linear, **kwargs): super(TimeDelayedConv, self).__init__(**kwargs) self.n_new_features = int(n_new_features) self.n_time_context = int(n_time_context) self.W_init = W_init self.b_init = b_init self.activation = activation # ====== time axis manipulation ====== # time_pool = str(time_pool).lower() assert time_pool in _allow_time_pool, \ "Only support: %s; but given: '%s'" % (str(_allow_time_pool), str(time_pool)) self.time_pool = time_pool self.backward = bool(backward)
def __init__(self, num_units, W_init=init_ops.glorot_uniform_initializer(seed=randint()), b_init=init_ops.constant_initializer(0.), rnn_mode='lstm', num_layers=1, skip_input=False, is_bidirectional=False, return_states=False, dropout=0., **kwargs): super(CudnnRNN, self).__init__(**kwargs) # ====== defaults recurrent control ====== # self.num_units = int(num_units) self.num_layers = int(num_layers) self.rnn_mode = str(rnn_mode) self.skip_input = bool(skip_input) self.is_bidirectional = bool(is_bidirectional) self.return_states = bool(return_states) self.dropout = dropout self.W_init = W_init self.b_init = b_init if skip_input: wprint("`skip_input` is not supported in Tensorflow.")
def __init__(self, num_filters, filter_size, strides=1, pad='valid', W_init=init_ops.glorot_uniform_initializer(seed=randint()), b_init=init_ops.constant_initializer(0), untie_biases=False, activation=K.linear, dilation=1, **kwargs): super(Conv, self).__init__(**kwargs) self.num_filters = int(num_filters) self.filter_size = filter_size self.strides = strides self.pad = pad self.W_init = W_init self.b_init = b_init self.untie_biases = bool(untie_biases) self.dilation = dilation self.activation = K.linear if activation is None else activation
def __init__(self, num_filters, filter_size, strides=1, pad='valid', W_init=init_ops.glorot_uniform_initializer(seed=randint()), b_init=init_ops.constant_initializer(0), untie_biases=False, activation=K.linear, dilation=1, output_shape=None, **kwargs): super(TransposeConv, self).__init__(num_filters=num_filters, filter_size=filter_size, strides=strides, pad=pad, W_init=W_init, b_init=b_init, untie_biases=untie_biases, activation=activation, dilation=dilation, **kwargs) # explicit output shape self._output_shape = output_shape
def __init__(self, input_size, output_size, W_init=init_ops.random_uniform_initializer(seed=randint()), **kwargs): super(Embedding, self).__init__(**kwargs) self.input_size = input_size self.output_size = output_size self.W_init = W_init
def randrectify(x, lower=0.3, upper=0.8, shared_axes='auto', name="RandRectify"): """ This function is adpated from Lasagne Original work Copyright (c) 2014-2015 lasagne contributors All rights reserved. LICENSE: https://github.com/Lasagne/Lasagne/blob/master/LICENSE Applies a randomized leaky rectify activation to x. The randomized leaky rectifier was first proposed and used in the Kaggle NDSB Competition, and later evaluated in [1]_. Compared to the standard leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope for negative input during training, and a fixed slope during evaluation. Equation for the randomized rectifier linear unit during training: :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)` During evaluation, the factor is fixed to the arithmetic mean of `lower` and `upper`. Parameters ---------- lower : Theano shared variable, expression, or constant The lower bound for the randomly chosen slopes. upper : Theano shared variable, expression, or constant The upper bound for the randomly chosen slopes. shared_axes : 'auto', 'all', int or tuple of int The axes along which the random slopes of the rectifier units are going to be shared. If ``'auto'`` (the default), share over all axes except for the second - this will share the random slope over the minibatch dimension for dense layers, and additionally over all spatial dimensions for convolutional layers. If ``'all'``, share over all axes, thus using a single random slope. References ---------- .. [1] Bing Xu, Naiyan Wang et al. (2015): Empirical Evaluation of Rectified Activations in Convolutional Network, http://arxiv.org/abs/1505.00853 """ ndims = x.shape.ndims # ====== check lower and upper ====== # if is_variable(lower): add_roles(lower, ActivationParameter) if is_variable(upper): add_roles(upper, ActivationParameter) if not is_tensor(lower > upper) and lower > upper: raise ValueError("Upper bound for Randomized Rectifier needs " "to be higher than lower bound.") # ====== check shared_axes ====== # if shared_axes == 'auto': shared_axes = (0,) + tuple(range(2, ndims)) elif shared_axes == 'all': shared_axes = tuple(range(ndims)) elif isinstance(shared_axes, int): shared_axes = (shared_axes,) else: shared_axes = shared_axes # ====== main logic ====== # if not is_training() or upper == lower: x = relu(x, alpha=(upper + lower) / 2.0) else: # Training mode shape = list(input_shape) if builtins.any(s is None for s in shape): shape = list(x.shape) for ax in shared_axes: shape[ax] = 1 rnd = tf.random_uniform(tuple(shape), minval=lower, maxval=upper, dtype=x.dtype.base_dtype, seed=randint()) x = relu(x, rnd) return x
def apply_dropout(x, level=0.5, noise_dims=None, noise_type='uniform', rescale=True, name="ApplyDropout"): """Computes dropout. With probability `keep_prob`, outputs the input element scaled up by `1 / keep_prob`, otherwise outputs `0`. The scaling is so that the expected sum is unchanged. Parameters ---------- x: A tensor. input tensor level: float(0.-1.) probability dropout values in given tensor noise_dims: int or list(int) these dimensions will be setted to 1 in noise_shape, and used to broadcast the dropout mask. noise_type: 'gaussian' (or 'normal'), 'uniform' distribution used for generating noise rescale: bool whether rescale the outputs by dividing the retain probablity seed: random seed or `tensor.rng` random generator from tensor class References ---------- [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) Note ---- This function only apply noise on Variable when training is enable """ shape = tf.shape(x) retain_prob = 1. - level # ====== not a training variable NO dropout ====== # if 'normal' in noise_type or 'gaussian' in noise_type: randfunc = lambda shape: tf.random_normal( shape=shape, mean=1.0, stddev=np.sqrt((1.0 - retain_prob) / retain_prob), dtype=x.dtype.base_dtype, seed=randint()) elif 'uniform' in noise_type: randfunc = lambda shape: random_binomial(shape=shape, p=retain_prob, dtype=x.dtype.base_dtype, seed=randint()) else: raise ValueError('No support for noise_type=' + noise_type) # ====== Dropout ====== # def training_fn(): noise_shape = shape if noise_dims is None else \ _process_noise_dim(shape, noise_dims) y = x * randfunc(shape=noise_shape) if rescale: y /= retain_prob return y def inference_fn(): return x with tf.variable_scope(name): return tf.cond(is_training(), training_fn, inference_fn)
def randrectify(x, lower=0.3, upper=0.8, shared_axes='auto', name="RandRectify"): """ This function is adpated from Lasagne Original work Copyright (c) 2014-2015 lasagne contributors All rights reserved. LICENSE: https://github.com/Lasagne/Lasagne/blob/master/LICENSE Applies a randomized leaky rectify activation to x. The randomized leaky rectifier was first proposed and used in the Kaggle NDSB Competition, and later evaluated in [1]_. Compared to the standard leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope for negative input during training, and a fixed slope during evaluation. Equation for the randomized rectifier linear unit during training: :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)` During evaluation, the factor is fixed to the arithmetic mean of `lower` and `upper`. Parameters ---------- lower : Theano shared variable, expression, or constant The lower bound for the randomly chosen slopes. upper : Theano shared variable, expression, or constant The upper bound for the randomly chosen slopes. shared_axes : 'auto', 'all', int or tuple of int The axes along which the random slopes of the rectifier units are going to be shared. If ``'auto'`` (the default), share over all axes except for the second - this will share the random slope over the minibatch dimension for dense layers, and additionally over all spatial dimensions for convolutional layers. If ``'all'``, share over all axes, thus using a single random slope. References ---------- .. [1] Bing Xu, Naiyan Wang et al. (2015): Empirical Evaluation of Rectified Activations in Convolutional Network, http://arxiv.org/abs/1505.00853 """ ndims = x.shape.ndims # ====== check lower and upper ====== # if is_variable(lower): add_roles(lower, ActivationParameter) if is_variable(upper): add_roles(upper, ActivationParameter) if not is_tensor(lower > upper) and lower > upper: raise ValueError("Upper bound for Randomized Rectifier needs " "to be higher than lower bound.") # ====== check shared_axes ====== # if shared_axes == 'auto': shared_axes = (0, ) + tuple(range(2, ndims)) elif shared_axes == 'all': shared_axes = tuple(range(ndims)) elif isinstance(shared_axes, int): shared_axes = (shared_axes, ) else: shared_axes = shared_axes # ====== main logic ====== # if not is_training() or upper == lower: x = relu(x, alpha=(upper + lower) / 2.0) else: # Training mode shape = list(input_shape) if builtins.any(s is None for s in shape): shape = list(x.shape) for ax in shared_axes: shape[ax] = 1 rnd = tf.random_uniform(tuple(shape), minval=lower, maxval=upper, dtype=x.dtype.base_dtype, seed=randint()) x = relu(x, rnd) return x