def __init__(self, num_channels, float_dtype, alpha_lo=0.001, alpha_hi=1.999, alpha_init=None, scale_lo=1e-5, scale_init=1.0, name=None): """Constructs the loss function. Args: num_channels: the number of different "channels" for the adaptive loss function, where each channel will be assigned its own shape (alpha) and scale parameters that are constructed as variables and can be optimized over. float_dtype: The expected numerical precision of the input, which will also determine the precision of the latent variables used to model scale and alpha internally. alpha_lo: The lowest possible value for loss's alpha parameters, must be >=0 and a scalar. Should probably be in (0, 2). alpha_hi: The highest possible value for loss's alpha parameters, must be >=alpha_lo and a scalar. Should probably be in (0, 2). alpha_init: The value that the loss's alpha parameters will be initialized to, must be in (`alpha_lo`, `alpha_hi`), unless `alpha_lo==alpha_hi` in which case this will be ignored. Defaults to (`alpha_lo+alpha_hi)/2`. scale_lo: The lowest possible value for the loss's scale parameters. Must be > 0 and a scalar. This value may have more of an effect than you think, as the loss is unbounded as scale approaches zero. scale_init: The initial value used for the loss's scale parameters. This also defines the zero-point of the latent representation of scales, so SGD may cause optimization to gravitate towards producing scales near this value. name: The name of the module. Raises: ValueError: If any of the arguments are invalid. """ super(AdaptiveLossFunction, self).__init__(name=name) _check_scale(scale_lo, scale_init) if not np.isscalar(alpha_lo): raise ValueError('`alpha_lo` must be a scalar, but is of type {}'.format( type(alpha_lo))) if not np.isscalar(alpha_hi): raise ValueError('`alpha_hi` must be a scalar, but is of type {}'.format( type(alpha_hi))) if alpha_init is not None and not np.isscalar(alpha_init): raise ValueError( '`alpha_init` must be None or a scalar, but is of type {}'.format( type(alpha_init))) if not alpha_lo >= 0: raise ValueError('`alpha_lo` must be >= 0, but is {}'.format(alpha_lo)) if not alpha_hi >= alpha_lo: raise ValueError('`alpha_hi` = {} must be >= `alpha_lo` = {}'.format( alpha_hi, alpha_lo)) if alpha_init is not None and alpha_lo != alpha_hi: if not (alpha_init > alpha_lo and alpha_init < alpha_hi): raise ValueError( '`alpha_init` = {} must be in (`alpha_lo`, `alpha_hi`) = ({} {})' .format(alpha_init, alpha_lo, alpha_hi)) if alpha_lo != alpha_hi: # If alpha isn't constant, construct a "latent" alpha variable. if alpha_init is None: alpha_init = (alpha_lo + alpha_hi) / 2. latent_alpha_init = ( util.inv_affine_sigmoid(alpha_init, lo=alpha_lo, hi=alpha_hi)) self._latent_alpha = tf.Variable( tf.fill((1, num_channels), tf.cast(latent_alpha_init, dtype=float_dtype)), name='LatentAlpha') if scale_lo != scale_init: # If shape isn't constant, construct a "latent" scale variable. self._latent_scale = tf.Variable( tf.zeros((1, num_channels), float_dtype), name='LatentScale') self._num_channels = num_channels self._float_dtype = tf.dtypes.as_dtype(float_dtype) self._alpha_lo = alpha_lo self._alpha_hi = alpha_hi self._scale_lo = scale_lo self._scale_init = scale_init self._distribution = distribution.Distribution()
def setUp(self): self._distribution = distribution.Distribution() super(DistributionTest, self).setUp() np.random.seed(0)