def __init__(self, num_features: int, axis: int = -1, event_ndims: int = 1): """ Construct a new :class:`FeatureShufflingFlow`. Args: num_features: The size of the feature axis. axis: The feature axis, to apply the transformation. event_ndims: Number of dimensions to be considered as the event dimensions. `x.ndims - event_ndims == log_det.ndims`. """ super().__init__(axis=int(axis), event_ndims=event_ndims, explicitly_invertible=True) self.num_features = num_features # initialize the permutation variable, and the inverse permutation permutation = torch.randperm(num_features, dtype=torch.int64) inv_permutation = torch.argsort(permutation) # register the permutation as layer parameter, such that it could be # persisted by Model checkpoint. add_parameter(self, 'permutation', permutation, requires_grad=False) add_parameter(self, 'inv_permutation', inv_permutation, requires_grad=False)
def __init__(self, shape: List[int], initializer: TensorInitArgType, device: Optional[str] = None): super().__init__(shape) device = device or current_device() add_parameter(self, 'value', variable(shape, initializer=initializer, device=device))
def __init__(self, shape: List[int], initializer: TensorInitArgType, norm_axis: int = 1, device: Optional[str] = None, epsilon: float = EPSILON): super().__init__(shape) self.norm_axis = norm_axis device = device or current_device() self.epsilon = epsilon weight = variable(shape, initializer=initializer, device=device) with torch.no_grad(): v, _ = weight_norm_decompose(weight, norm_axis, epsilon) add_parameter(self, 'v', v)
def __init__(self, seed_matrix, dtype=torch.float32, epsilon=1e-5): initial_matrix = la.qr(seed_matrix)[0] super().__init__(initial_matrix.shape[0]) matrix_shape = list(initial_matrix.shape) # self.size = matrix_shape[0] initial_P, initial_L, initial_U = la.lu(initial_matrix) initial_s = np.diag(initial_U) initial_sign = np.sign(initial_s) initial_log_s = np.log(np.maximum(np.abs(initial_s), epsilon)) initial_U = np.triu(initial_U, k=1) # 上三角阵,对角线元素为0 add_buffer(self, 'P', as_tensor(initial_P, dtype=dtype, force_copy=True)) add_parameter(self, 'pre_L', as_tensor(initial_L, dtype=dtype, force_copy=True)) add_buffer(self, 'L_mask', as_tensor(np.tril(np.ones(matrix_shape), k=-1), dtype=dtype, force_copy=True)) add_parameter(self, 'pre_U', as_tensor(initial_U, dtype=dtype, force_copy=True)) add_buffer(self, 'U_mask', as_tensor(np.triu(np.ones(matrix_shape), k=1), dtype=dtype, force_copy=True)) add_buffer(self, 'sign', as_tensor(initial_sign, dtype=dtype, force_copy=True)) add_parameter(self, 'log_s', as_tensor(initial_log_s, dtype=dtype, force_copy=True))
def __init__( self, num_features, event_ndims: int = 1, axis: int = -1, w_init: TensorInitArgType = init.normal, b_init: TensorInitArgType = init.zeros, u_init: TensorInitArgType = init.normal, ): super().__init__(axis=axis, event_ndims=event_ndims, explicitly_invertible=False) add_parameter( self, 'w', value=variable([1, num_features], initializer=w_init), ) add_parameter(self, 'b', value=variable([1], initializer=b_init)) add_parameter(self, 'u', value=variable([1, num_features], initializer=u_init)) self.num_features = num_features self.u_hat = None
def __init__(self, num_features: int, axis: int = -1, event_ndims: int = 1, scale: Union[str, ActNormScaleType] = 'exp', initialized: bool = False, epsilon: float = 1e-5, dtype: str = 'float32'): """ Construct a new :class:`ActNorm` instance. Args: num_features: The size of the feature axis. scale: One of {"exp", "linear"}. If "exp", ``y = (x + bias) * tf.exp(log_scale)``. If "linear", ``y = (x + bias) * scale``. Defaults to "exp". axis: The axis to apply ActNorm. Dimensions not in `axis` will be averaged out when computing the mean of activations. Default `-1`, the last dimension. All items of the `axis` should be covered by `event_ndims`. event_ndims: Number of value dimensions in both `x` and `y`. `x.ndims - event_ndims == log_det.ndims` and `y.ndims - event_ndims == log_det.ndims`. initialized: Whether or not the variables have been initialized? Defaults to :obj:`False`, where the first input `x` in the forward pass will be used to initialize the variables. epsilon: The infinitesimal constant to avoid dividing by zero or taking logarithm of zero. dtype: Dtype of the parameters. """ # validate the arguments scale_type = ActNormScaleType(scale) epsilon = float(epsilon) if scale_type == ActNormScaleType.EXP: scale = ExpScale() pre_scale_init = partial(init.fill, fill_value=0.) elif scale_type == ActNormScaleType.LINEAR: scale = LinearScale(epsilon=epsilon) pre_scale_init = partial(init.fill, fill_value=1.) else: # pragma: no cover raise ValueError(f'Unsupported `scale_type`: {scale_type}') # construct the layer super().__init__(axis=axis, event_ndims=event_ndims, explicitly_invertible=True) self.num_features = num_features self.scale = scale self.scale_type = scale_type.value self.epsilon = epsilon self.initialized = initialized add_parameter( self, 'pre_scale', variable([num_features], dtype=dtype, initializer=pre_scale_init), ) add_parameter( self, 'bias', variable([num_features], dtype=dtype, initializer=init.zeros), )
def __init__(self, seed_matrix, dtype=torch.float32): initial_matrix = la.qr(seed_matrix)[0] # 获取正交矩阵 super().__init__(initial_matrix.shape[0]) add_parameter(self, 'matrix', as_tensor(seed_matrix, dtype=dtype, force_copy=True))