Exemplo n.º 1
0
 def testAffineSoftplusIsCentered(self):
     """Check that affine_softplus(0) == 1."""
     for _ in range(10):
         lo = np.random.uniform(0., 0.1)
         ref = np.random.uniform(0.2, 10.)
         y = util.affine_softplus(np.array(0.), lo=lo, ref=ref)
         self.assertAllClose(y, ref)
    def scale(self, indexes):
        # print(self.latent_scale.shape)  # should be [1, num_dims]
        gather_latent_scale = torch.index_select(self.latent_scale, 1, indexes)

        return util.affine_softplus(gather_latent_scale,
                                    lo=self.scale_lo,
                                    ref=self.scale_init)
Exemplo n.º 3
0
  def __init__(self, num_dims, float_dtype, scale_lo=1e-5, scale_init=1.0):
    """Sets up the adaptive loss for a matrix of inputs.

    Args:
      num_dims: The number of dimensions of the input to come.
      float_dtype: The floating point precision of the inputs to come.
      scale_lo: The lowest possible value for the loss's scale parameters. Must
        be > 0 and a scalar. This value may have more of an effect than you
        think, as the loss is unbounded as scale approaches zero (say, at a
        delta function).
      scale_init: The initial value used for the loss's scale parameters. This
        also defines the zero-point of the latent representation of scales, so
        SGD may cause optimization to gravitate towards producing scales near
        this value.
    """
    super(StudentsTLossFunction, self).__init__()

    if not np.isscalar(scale_lo):
      raise ValueError('`scale_lo` must be a scalar, but is of type {}'.format(
          type(scale_lo)))
    if not np.isscalar(scale_init):
      raise ValueError(
          '`scale_init` must be a scalar, but is of type {}'.format(
              type(scale_init)))
    if not scale_lo > 0:
      raise ValueError('`scale_lo` must be > 0, but is {}'.format(scale_lo))
    if not scale_init >= scale_lo:
      raise ValueError('`scale_init` = {} must be >= `scale_lo` = {}'.format(
          scale_init, scale_lo))

    self.num_dims = num_dims
    if float_dtype == np.float32:
      float_dtype = torch.float32
    if float_dtype == np.float64:
      float_dtype = torch.float64
    self.float_dtype = float_dtype

    self.log_df = torch.nn.Parameter(
        torch.zeros((1, self.num_dims)).type(self.float_dtype),
        requires_grad=True)
    self.register_parameter('log_df', self.log_df)

    if scale_lo == scale_init:
      # If the difference between the minimum and initial scale is zero, then
      # we just fix `scale` to be a constant.
      self.latent_scale = None
      self.scale = torch.as_tensor(scale_init).type(
          self.float_dtype)[np.newaxis, np.newaxis].repeat(1, self.num_dims)
    else:
      # Otherwise we construct a "latent" scale variable and define `scale`
      # As an affine function of a softplus on that latent variable.
      self.latent_scale = torch.nn.Parameter(
          torch.zeros((1, self.num_dims)).type(self.float_dtype),
          requires_grad=True)
    self.register_parameter('latent_scale', self.latent_scale)
    self.df = lambda: torch.exp(self.log_df)
    self.scale = lambda: util.affine_softplus(
        self.latent_scale, lo=scale_lo, ref=scale_init)
Exemplo n.º 4
0
 def testAffineSoftplusSpansRange(self):
     """Check that affine_softplus()'s output is in [lo, infinity]."""
     x = np.finfo(np.float32).max * np.array([-1, 1], dtype=np.float32)
     for _ in range(10):
         lo = np.random.uniform(0., 0.1)
         ref = np.random.uniform(0.2, 10.)
         y = util.affine_softplus(x, lo=lo, ref=ref)
         self.assertAllClose(y[0], lo)
         self.assertAllGreater(y[1], 1e10)
Exemplo n.º 5
0
 def testAffineSoftplusRoundTrip(self):
     """Check that x = inv_affine_softplus(affine_softplus(x)) in general."""
     x = np.float32(np.linspace(-10., 10., 1000))
     for _ in range(10):
         lo = np.random.uniform(0., 0.1)
         ref = np.random.uniform(0.2, 10.)
         y = util.affine_softplus(x, lo=lo, ref=ref)
         x_recon = util.inv_affine_softplus(y, lo=lo, ref=ref)
         self.assertAllClose(x, x_recon, atol=1e-5, rtol=1e-3)
Exemplo n.º 6
0
    def __init__(self,
                 num_dims,
                 float_dtype,
                 device,
                 alpha_lo=0.001,
                 alpha_hi=1.999,
                 alpha_init=None,
                 scale_lo=1e-5,
                 scale_init=1.0):
        """Sets up the loss function.

    Args:
      num_dims: The number of dimensions of the input to come.
      float_dtype: The floating point precision of the inputs to come.
      device: The device to run on (cpu, cuda, etc).
      alpha_lo: The lowest possible value for loss's alpha parameters, must be
        >= 0 and a scalar. Should probably be in (0, 2).
      alpha_hi: The highest possible value for loss's alpha parameters, must be
        >= alpha_lo and a scalar. Should probably be in (0, 2).
      alpha_init: The value that the loss's alpha parameters will be initialized
        to, must be in (`alpha_lo`, `alpha_hi`), unless `alpha_lo` == `alpha_hi`
        in which case this will be ignored. Defaults to (`alpha_lo` +
        `alpha_hi`) / 2
      scale_lo: The lowest possible value for the loss's scale parameters. Must
        be > 0 and a scalar. This value may have more of an effect than you
        think, as the loss is unbounded as scale approaches zero (say, at a
        delta function).
      scale_init: The initial value used for the loss's scale parameters. This
        also defines the zero-point of the latent representation of scales, so
        SGD may cause optimization to gravitate towards producing scales near
        this value.
    """
        super(AdaptiveLossFunction, self).__init__()

        if not np.isscalar(alpha_lo):
            raise ValueError(
                '`alpha_lo` must be a scalar, but is of type {}'.format(
                    type(alpha_lo)))
        if not np.isscalar(alpha_hi):
            raise ValueError(
                '`alpha_hi` must be a scalar, but is of type {}'.format(
                    type(alpha_hi)))
        if alpha_init is not None and not np.isscalar(alpha_init):
            raise ValueError(
                '`alpha_init` must be None or a scalar, but is of type {}'.
                format(type(alpha_init)))
        if not alpha_lo >= 0:
            raise ValueError(
                '`alpha_lo` must be >= 0, but is {}'.format(alpha_lo))
        if not alpha_hi >= alpha_lo:
            raise ValueError(
                '`alpha_hi` = {} must be >= `alpha_lo` = {}'.format(
                    alpha_hi, alpha_lo))
        if alpha_init is not None and alpha_lo != alpha_hi:
            if not (alpha_init > alpha_lo and alpha_init < alpha_hi):
                raise ValueError(
                    '`alpha_init` = {} must be in (`alpha_lo`, `alpha_hi`) = ({} {})'
                    .format(alpha_init, alpha_lo, alpha_hi))
        if not np.isscalar(scale_lo):
            raise ValueError(
                '`scale_lo` must be a scalar, but is of type {}'.format(
                    type(scale_lo)))
        if not np.isscalar(scale_init):
            raise ValueError(
                '`scale_init` must be a scalar, but is of type {}'.format(
                    type(scale_init)))
        if not scale_lo > 0:
            raise ValueError(
                '`scale_lo` must be > 0, but is {}'.format(scale_lo))
        if not scale_init >= scale_lo:
            raise ValueError(
                '`scale_init` = {} must be >= `scale_lo` = {}'.format(
                    scale_init, scale_lo))

        self.num_dims = num_dims
        if float_dtype == np.float32:
            float_dtype = torch.float32
        if float_dtype == np.float64:
            float_dtype = torch.float64
        self.float_dtype = float_dtype
        self.device = device
        if isinstance(device, int) or\
           (isinstance(device, str) and 'cuda' in device):
            torch.cuda.set_device(self.device)

        self.distribution = distribution.Distribution()

        if alpha_lo == alpha_hi:
            # If the range of alphas is a single item, then we just fix `alpha` to be
            # a constant.
            self.fixed_alpha = torch.tensor(
                alpha_lo, dtype=self.float_dtype,
                device=self.device)[np.newaxis,
                                    np.newaxis].repeat(1, self.num_dims)
            self.alpha = lambda: self.fixed_alpha
        else:
            # Otherwise we construct a "latent" alpha variable and define `alpha`
            # As an affine function of a sigmoid on that latent variable, initialized
            # such that `alpha` starts off as `alpha_init`.
            if alpha_init is None:
                alpha_init = (alpha_lo + alpha_hi) / 2.
            latent_alpha_init = util.inv_affine_sigmoid(alpha_init,
                                                        lo=alpha_lo,
                                                        hi=alpha_hi)
            self.register_parameter(
                'latent_alpha',
                torch.nn.Parameter(latent_alpha_init.clone().detach().to(
                    dtype=self.float_dtype,
                    device=self.device)[np.newaxis,
                                        np.newaxis].repeat(1, self.num_dims),
                                   requires_grad=True))
            self.alpha = lambda: util.affine_sigmoid(
                self.latent_alpha, lo=alpha_lo, hi=alpha_hi)

        if scale_lo == scale_init:
            # If the difference between the minimum and initial scale is zero, then
            # we just fix `scale` to be a constant.
            self.fixed_scale = torch.tensor(
                scale_init, dtype=self.float_dtype,
                device=self.device)[np.newaxis,
                                    np.newaxis].repeat(1, self.num_dims)
            self.scale = lambda: self.fixed_scale
        else:
            # Otherwise we construct a "latent" scale variable and define `scale`
            # As an affine function of a softplus on that latent variable.
            self.register_parameter(
                'latent_scale',
                torch.nn.Parameter(torch.zeros(
                    (1, self.num_dims)).to(dtype=self.float_dtype,
                                           device=self.device),
                                   requires_grad=True))
            self.scale = lambda: util.affine_softplus(
                self.latent_scale, lo=scale_lo, ref=scale_init)
Exemplo n.º 7
0
 def testDefaultAffineSoftplusRoundTrip(self):
     """Check that x = inv_affine_softplus(affine_softplus(x)) by default."""
     x = np.float32(np.linspace(-10., 10., 1000))
     y = util.affine_softplus(x)
     x_recon = util.inv_affine_softplus(y)
     self.assertAllClose(x, x_recon, atol=1e-5, rtol=1e-3)