def build(self, input_shape): self.ema_mean = self.add_weight( name='ema_mean', shape=(self.num_latent, ), initializer=init_ops.Zeros(), trainable=False, dtype=tf.float32, ) self.ema_var = self.add_weight( name='ema_var', shape=(self.num_latent, ), initializer=init_ops.Zeros(), trainable=False, dtype=tf.float32, )
def _DenseLayer(x, num_inputs, num_outputs, quantization_range, name): """Dense layer with quantized outputs. Args: x: input to the dense layer num_inputs: number of input columns of x num_outputs: number of output columns quantization_range: the min/max range for quantization name: name of the variable scope Returns: The output of the layer. """ with variable_scope.variable_scope(name): kernel = variable_scope.get_variable( 'kernel', shape=[num_inputs, num_outputs], dtype=dtypes.float32, initializer=init_ops.GlorotUniform()) bias = variable_scope.get_variable( 'bias', shape=[num_outputs], dtype=dtypes.float32, initializer=init_ops.Zeros()) x = math_ops.matmul(x, kernel) x = _Quantize(x, quantization_range) x = nn.bias_add(x, bias) x = _Quantize(x, quantization_range) return x
def test_Zeros(self): tensor_shape = (4, 5) with self.cached_session(): self._runner(init_ops.Zeros(), tensor_shape, target_mean=0., target_max=0.)
def test_Zeros(self): shape = (4, 5) with self.cached_session(): for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: self._runner(init_ops.Zeros(), tensor_shape, target_mean=0., target_max=0.)
def __init__(self, rank, filters, kernel_support, corr=False, strides_down=1, strides_up=1, padding="valid", extra_pad_end=True, channel_separable=False, data_format="channels_last", activation=None, use_bias=False, kernel_initializer=init_ops.VarianceScaling(), bias_initializer=init_ops.Zeros(), kernel_regularizer=None, bias_regularizer=None, kernel_parameterizer=parameterizers.RDFTParameterizer(), bias_parameterizer=None, **kwargs): super(_SignalConv, self).__init__(**kwargs) self._rank = int(rank) self._filters = int(filters) self._kernel_support = utils.normalize_tuple(kernel_support, self._rank, "kernel_support") self._corr = bool(corr) self._strides_down = utils.normalize_tuple(strides_down, self._rank, "strides_down") self._strides_up = utils.normalize_tuple(strides_up, self._rank, "strides_up") self._padding = str(padding).lower() try: self._pad_mode = { "valid": None, "same_zeros": "CONSTANT", "same_reflect": "REFLECT", }[self.padding] except KeyError: raise ValueError("Unsupported padding mode: '{}'".format(padding)) self._extra_pad_end = bool(extra_pad_end) self._channel_separable = bool(channel_separable) self._data_format = utils.normalize_data_format(data_format) self._activation = activation self._use_bias = bool(use_bias) self._kernel_initializer = kernel_initializer self._bias_initializer = bias_initializer self._kernel_regularizer = kernel_regularizer self._bias_regularizer = bias_regularizer self._kernel_parameterizer = kernel_parameterizer self._bias_parameterizer = bias_parameterizer self.input_spec = base.InputSpec(ndim=self._rank + 2)
def _recreate_variable(self, proto): # TODO(andresp): Can we use the checkpointed value as initializer? dummy_value = init_ops.Zeros(dtype=proto.dtype)(shape=proto.shape) synchronization, aggregation, trainable = ( variables.validate_synchronization_aggregation_trainable( proto.synchronization, proto.aggregation, proto.trainable, # TODO(allenl): We should save variable names. name="<variable recreated from SavedModel>")) return variables.Variable( dummy_value, trainable=trainable, synchronization=synchronization, aggregation=aggregation), setattr
def _recreate_variable(self, proto): # TODO(andresp): Can we use the checkpointed value as initializer? dummy_value = init_ops.Zeros(dtype=proto.dtype)(shape=proto.shape) name = proto.name if proto.name else None if name is not None: dbg_name = name else: dbg_name = "<variable loaded from saved model>" synchronization, aggregation, trainable = ( variables.validate_synchronization_aggregation_trainable( proto.synchronization, proto.aggregation, proto.trainable, name=dbg_name)) return variables.Variable(dummy_value, name=name, trainable=trainable, synchronization=synchronization, aggregation=aggregation), setattr
def _recreate_variable(self, proto): # TODO(andresp): Can we use the checkpointed value as initializer? dummy_value = init_ops.Zeros(dtype=proto.dtype)(shape=proto.shape) return variables.Variable(dummy_value, trainable=proto.trainable), setattr
def build(self, input_shape): """Builds the layer. Creates the variables for the network modeling the densities, creates the auxiliary loss estimating the median and tail quantiles of the densities, and then uses that to create the probability mass functions and the update op that produces the discrete cumulative density functions used by the range coder. Args: input_shape: Shape of the input tensor, used to get the number of channels. Raises: ValueError: if `input_shape` doesn't specify the length of the channel dimension. """ input_shape = tensor_shape.TensorShape(input_shape) channel_axis = self._channel_axis(input_shape.ndims) channels = input_shape[channel_axis].value if channels is None: raise ValueError( "The channel dimension of the inputs must be defined.") self.input_spec = engine.InputSpec(ndim=input_shape.ndims, axes={channel_axis: channels}) filters = (1, ) + self.filters + (1, ) scale = self.init_scale**(1 / (len(self.filters) + 1)) # Create variables. self._matrices = [] self._biases = [] self._factors = [] for i in range(len(self.filters) + 1): init = np.log(np.expm1(1 / scale / filters[i + 1])) matrix = self.add_variable("matrix_{}".format(i), dtype=self.dtype, shape=(channels, filters[i + 1], filters[i]), initializer=init_ops.Constant(init)) matrix = nn.softplus(matrix) self._matrices.append(matrix) bias = self.add_variable("bias_{}".format(i), dtype=self.dtype, shape=(channels, filters[i + 1], 1), initializer=init_ops.RandomUniform( -.5, .5)) self._biases.append(bias) if i < len(self.filters): factor = self.add_variable("factor_{}".format(i), dtype=self.dtype, shape=(channels, filters[i + 1], 1), initializer=init_ops.Zeros()) factor = math_ops.tanh(factor) self._factors.append(factor) # To figure out what range of the densities to sample, we need to compute # the quantiles given by `tail_mass / 2` and `1 - tail_mass / 2`. Since we # can't take inverses of the cumulative directly, we make it an optimization # problem: # `quantiles = argmin(|logit(cumulative) - target|)` # where `target` is `logit(tail_mass / 2)` or `logit(1 - tail_mass / 2)`. # Taking the logit (inverse of sigmoid) of the cumulative makes the # representation of the right target more numerically stable. # Numerically stable way of computing logits of `tail_mass / 2` # and `1 - tail_mass / 2`. target = np.log(2 / self.tail_mass - 1) # Compute lower and upper tail quantile as well as median. target = constant_op.constant([-target, 0, target], dtype=self.dtype) def quantiles_initializer(shape, dtype=None, partition_info=None): del partition_info # unused assert tuple(shape[1:]) == (1, 3) init = constant_op.constant( [[[-self.init_scale, 0, self.init_scale]]], dtype=dtype) return array_ops.tile(init, (shape[0], 1, 1)) quantiles = self.add_variable("quantiles", shape=(channels, 1, 3), dtype=self.dtype, initializer=quantiles_initializer) logits = self._logits_cumulative(quantiles, stop_gradient=True) loss = math_ops.reduce_sum(abs(logits - target)) self.add_loss(loss, inputs=None) # Save medians for `call`, `compress`, and `decompress`. self._medians = quantiles[:, :, 1:2] if not self.optimize_integer_offset: self._medians = math_ops.round(self._medians) # Largest distance observed between lower tail quantile and median, # or between median and upper tail quantile. minima = math_ops.reduce_max(self._medians - quantiles[:, :, 0:1]) maxima = math_ops.reduce_max(quantiles[:, :, 2:3] - self._medians) minmax = math_ops.maximum(minima, maxima) minmax = math_ops.ceil(minmax) minmax = math_ops.maximum(minmax, 1) # Sample the density up to `minmax` around the median. samples = math_ops.range(-minmax, minmax + 1, dtype=self.dtype) samples += self._medians half = constant_op.constant(.5, dtype=self.dtype) # We strip the sigmoid from the end here, so we can use the special rule # below to only compute differences in the left tail of the sigmoid. # This increases numerical stability (see explanation in `call`). lower = self._logits_cumulative(samples - half, stop_gradient=True) upper = self._logits_cumulative(samples + half, stop_gradient=True) # Flip signs if we can move more towards the left tail of the sigmoid. sign = -math_ops.sign(math_ops.add_n([lower, upper])) pmf = abs( math_ops.sigmoid(sign * upper) - math_ops.sigmoid(sign * lower)) # Add tail masses to first and last bin of pmf, as we clip values for # compression, meaning that out-of-range values get mapped to these bins. pmf = array_ops.concat([ math_ops.add_n([pmf[:, 0, :1], math_ops.sigmoid(lower[:, 0, :1])]), pmf[:, 0, 1:-1], math_ops.add_n( [pmf[:, 0, -1:], math_ops.sigmoid(-upper[:, 0, -1:])]), ], axis=-1) self._pmf = pmf cdf = coder_ops.pmf_to_quantized_cdf( pmf, precision=self.range_coder_precision) def cdf_getter(*args, **kwargs): del args, kwargs # ignored return variable_scope.get_variable("quantized_cdf", dtype=dtypes.int32, initializer=cdf, trainable=False, validate_shape=False, collections=()) # Need to provide a fake shape here since add_variable insists on it. self._quantized_cdf = self.add_variable("quantized_cdf", shape=(channels, 1), dtype=dtypes.int32, getter=cdf_getter, trainable=False) update_op = state_ops.assign(self._quantized_cdf, cdf, validate_shape=False) self.add_update(update_op, inputs=None) super(EntropyBottleneck, self).build(input_shape)