def __init__( self, in_units: int, hidden_units: int, out_units: int, num_hidden_layers: int = 2, activation: str = "lipswish", jacobian_method: str = "bf", num_power_iter: int = 1, coeff: float = 0.9, flatten: bool = False, ): super().__init__() self._in_units = in_units self._hidden_units = hidden_units self._out_units = out_units self._num_hidden_layers = num_hidden_layers self._activation = activation self._jacobian_method = jacobian_method self._num_power_iter = num_power_iter self._coeff = coeff self._weight_initializer = mx.init.Orthogonal(scale=self._coeff) self._bias_initializer = "zeros" self._flatten = flatten self._cached_inputs: List[Tensor] = [] in_dim = self._in_units with self.name_scope(): self._layers: List[mx.gluon.HybridBlock] = [] for i in range(self._num_hidden_layers): lin = SNDense( self._hidden_units, in_units=in_dim, activation=None, num_power_iter=self._num_power_iter, weight_initializer=self._weight_initializer, bias_initializer=self._bias_initializer, coeff=self._coeff, flatten=self._flatten, ) act = get_activation(self._activation, prefix=self._activation + str(i)) in_dim = self._hidden_units self.register_child(lin) self.register_child(act) self._layers += [lin, act] last_lin = SNDense( self._out_units, in_units=in_dim, activation=None, num_power_iter=self._num_power_iter, weight_initializer=self._weight_initializer, bias_initializer=self._bias_initializer, coeff=self._coeff, flatten=self._flatten, ) self.register_child(last_lin) self._layers += [last_lin]
def test_activation_deriv(activation, kwargs): def get_deriv_autograd(input, act): input.attach_grad() with autograd.record(): output = act(input) return autograd.grad(output, [input], create_graph=True)[0] input = mx.nd.random.randn(500, 20) act = get_activation(activation, **kwargs) act.initialize() correct_deriv = get_deriv_autograd(input, act) act_deriv = get_activation_deriv(act) output_deriv = act_deriv(mx.ndarray, input) assert all([ np.allclose(out, corr, atol=5.0e-8) for out, corr in zip( output_deriv.T.asnumpy(), correct_deriv.T.asnumpy()) ])
def __init__( self, units: int, in_units: int, coeff: float = 0.9, activation: Optional[str] = None, use_bias: bool = True, flatten: bool = True, weight_initializer: init.Initializer = init.Orthogonal(scale=0.9), bias_initializer="zeros", dtype="float32", num_power_iter: int = 1, ctx: Optional[mx.Context] = None, **kwargs): super().__init__(**kwargs) self._coeff = coeff self._flatten = flatten self._ctx = ctx if ctx is not None else get_mxnet_context() self._num_power_iter = num_power_iter with self.name_scope(): self._units = units self._in_units = in_units self._weight = self.params.get( "weight", shape=(units, in_units), init=weight_initializer, dtype=dtype, ) self._u = self.params.get("u", init=mx.init.Normal(), shape=(1, units)) if use_bias: self._bias = self.params.get("bias", shape=(units, ), init=bias_initializer, dtype=dtype) else: self._bias = None if activation is not None: self._act = get_activation(activation, prefix=activation + "_") else: self._act = None