def test_SimParam(self): init1 = 2 init2 = 2.0 init3 = np.array([3, 4, 5]) init4 = initializer.kaming_normal shape = [3] res1 = SimpleParamStore(shape, init1) res2 = SimpleParamStore(shape, init2) res3 = SimpleParamStore(shape, init3) res4 = SimpleParamStore([3, 5, 4, 4], init4) assert (res1().data - as_tensor([2, 2, 2]) < 1e-6).all() assert (res2().data - as_tensor([2.0, 2.0, 2.0]) < 1e-6).all() assert (res3().data - as_tensor([3, 4, 5]) < 1e-6).all() assert (list(res4().shape) == [3, 5, 4, 4]) res1.set([1, 2, 3]) assert (res1().data - as_tensor([1, 2, 3]) < 1e-6).all()
def __init__(self, seed_matrix, dtype=torch.float32, epsilon=1e-5): initial_matrix = la.qr(seed_matrix)[0] super().__init__(initial_matrix.shape[0]) matrix_shape = list(initial_matrix.shape) # self.size = matrix_shape[0] initial_P, initial_L, initial_U = la.lu(initial_matrix) initial_s = np.diag(initial_U) initial_sign = np.sign(initial_s) initial_log_s = np.log(np.maximum(np.abs(initial_s), epsilon)) initial_U = np.triu(initial_U, k=1) # 上三角阵,对角线元素为0 add_buffer(self, 'P', as_tensor(initial_P, dtype=dtype, force_copy=True)) add_parameter(self, 'pre_L', as_tensor(initial_L, dtype=dtype, force_copy=True)) add_buffer( self, 'L_mask', as_tensor(np.tril(np.ones(matrix_shape), k=-1), dtype=dtype, force_copy=True)) add_parameter(self, 'pre_U', as_tensor(initial_U, dtype=dtype, force_copy=True)) add_buffer( self, 'U_mask', as_tensor(np.triu(np.ones(matrix_shape), k=1), dtype=dtype, force_copy=True)) add_buffer(self, 'sign', as_tensor(initial_sign, dtype=dtype, force_copy=True)) add_parameter(self, 'log_s', as_tensor(initial_log_s, dtype=dtype, force_copy=True))
def test_weight_norm(self): initial_value = np.random.randn(2, 3, 4) new_value = np.random.randn(2, 3, 4) for norm_axis in [-3, -2, -1, 0, 1, 2]: store = NormedWeightStore([2, 3, 4], initializer=initial_value, norm_axis=norm_axis) # print(norm_axis, repr(store)) expected_value = as_tensor(initial_value) / norm_except_axis( as_tensor(initial_value), axis=norm_axis, keepdims=True) assert (store.get().data - expected_value < 1e-6).all() assert (store() - expected_value < 1e-6).all() assert (store.v - expected_value < 1e-6).all() store.set(as_tensor(new_value)) expected_value = as_tensor(new_value) / norm_except_axis( as_tensor(new_value), axis=norm_axis, keepdims=True) assert (store.get() - expected_value < 1e-6).all() assert (store() - expected_value < 1e-6).all() assert (store.v - expected_value < 1e-6).all()
def set(self, value: TensorOrData) -> None: with torch.no_grad(): v, _ = weight_norm_decompose( as_tensor(value, dtype=get_dtype(self.v), device=str(self.v.device)), self.norm_axis, self.epsilon, ) assign_data(self.v, v)
def check_core_linear(ctx, input, layer_factory, layer_name, numpy_fn): # print(layer_name) # test with bias layer = layer_factory(use_bias=True) assert (layer_name in repr(layer)) assert isinstance(layer.weight_store, SimpleParamStore) weight = to_numpy(layer.weight_store()) bias = to_numpy(layer.bias_store()) res1 = layer(as_tensor(input, dtype=torch.float32)) res2 = numpy_fn(input, weight, bias) assert (res1 - as_tensor(res2) < 1e-6).all() assert ('use_bias=' not in repr(layer)) # test without bias layer = layer_factory(use_bias=False) assert isinstance(layer.weight_store, SimpleParamStore) weight = to_numpy(layer.weight_store()) res1 = layer(as_tensor(input, dtype=torch.float32)) res2 = numpy_fn(input, weight, None) assert (res1 - as_tensor(res2) < 1e-6).all() assert ('use_bias=False' in repr(layer)) # test `weight_norm` for wn in [True, WeightNormMode.FULL, 'full']: layer = layer_factory(use_bias=False, weight_norm=wn) assert isinstance(layer.weight_store, NormedAndScaledWeightStore) weight = to_numpy(layer.weight_store()) res1 = layer(as_tensor(input, dtype=torch.float32)) res2 = numpy_fn(input, weight, None) assert (res1 - as_tensor(res2) < 1e-6).all() for wn in [WeightNormMode.NO_SCALE, 'no_scale']: layer = layer_factory(use_bias=False, weight_norm=wn) assert isinstance(layer.weight_store, NormedWeightStore) weight = to_numpy(layer.weight_store()) res1 = layer(as_tensor(input, dtype=torch.float32)) res2 = numpy_fn(input, weight, None) assert (res1 - as_tensor(res2) < 1e-6).all() for wn in [False, WeightNormMode.NONE, 'none']: layer = layer_factory(use_bias=False, weight_norm=wn) assert isinstance(layer.weight_store, SimpleParamStore)
def test_NormedAndScaledWeightStore(self): initial_value = np.random.randn(2, 3, 4) new_value = np.random.randn(2, 3, 4) for norm_axis in [-3, -2, -1, 0, 1, 2]: store = NormedAndScaledWeightStore([2, 3, 4], initializer=initial_value, norm_axis=norm_axis) assert (store.get().data - initial_value < 1e-3).all() assert (store().data - initial_value < 1e-3).all() assert (store.g - norm_except_axis( as_tensor(initial_value), norm_axis, keepdims=True) < 1e-3).all() assert (store.v - as_tensor(initial_value) / store.g < 1e-3).all() store.set(as_tensor(new_value)) assert (store.get() - as_tensor(new_value) < 1e-3).all() assert (store().data - new_value < 1e-3).all() assert (store.g - norm_except_axis( as_tensor(new_value), norm_axis, keepdims=True) < 1e-3).all() assert (store.v - as_tensor(new_value) / store.g < 1e-3).all()
def __init__(self, seed_matrix, dtype=torch.float32): initial_matrix = la.qr(seed_matrix)[0] # 获取正交矩阵 super().__init__(initial_matrix.shape[0]) add_parameter(self, 'matrix', as_tensor(seed_matrix, dtype=dtype, force_copy=True))
def set_bias(self, input: Tensor): self.bias = as_tensor(input)