def test_full_tt_svd(): np.random.seed(1234) W = np.random.rand(4096, 4096).astype(np.float32) tensor = Tensor(W, from_matrix=True, d=6) Gs = tensor.tt_factorization(0.1) sum = 1 for s in tensor.T.shape: sum *= s print('original parameters', sum) sum = 0 for g in Gs: sum_i = 1 for s in g.shape: sum_i *= s sum += sum_i print(g.shape) print('tt parameters', sum) tt = Gs[0] print(len(Gs)) for i in range(1, len(Gs)): if i == 1 or i == 2: r = max(Gs[i].shape) // 3 t = Tensor(Gs[i]) cp_rand = t.cp_rand(r, init='random', ret_tensors=False) tt = np.tensordot(tt, cp_rand, [len(tt.shape) - 1, 0]) else: tt = np.tensordot(tt, Gs[i], [len(tt.shape) - 1, 0]) print('tt-svd error', tensor.frobenius_norm(tensor.T - tt)) print('tt-svd error', tensor.relative_error(tt))
def test_tt(): A = np.random.rand(3, 5, 100, 200).astype(np.float32) tensor = Tensor(A) decomposed_tt = tensor.tt_factorization(0.01, 'svd') for d in decomposed_tt: print(d.shape) tt = decomposed_tt[0] for i in range(1, len(decomposed_tt)): tt = np.tensordot(tt, decomposed_tt[i], [len(tt.shape) - 1, 0]) print(tt.shape) print(f'tt-svd error: {tensor.relative_error(tt.reshape(A.shape))} %')
def decompose_conv_layer(self, layer, rk=None): weights = layer.weight.data weight_tensor = Tensor(weights) if self.verbose: logger.info(f'before shape: {weights.shape}') if self.factorization == 'cp': if rk is None: rk = max(weight_tensor.T.shape) // 3 lambdas, Us = weight_tensor.cp_rand(rk, ret_tensors=True) if self.verbose: decomposed = np.zeros(weight_tensor.T.shape) for i in range(rk): tmp = lambdas[i] * Us[0][:, i] for j in range(1, len(Us)): tmp = np.multiply.outer(tmp, Us[j][:, i]) decomposed += tmp logger.info('============error============') logger.info( f'cp_rand error: {weight_tensor.frobenius_norm(weight_tensor.T - decomposed)}' ) logger.info( f'relative error: {weight_tensor.relative_error(decomposed)}' ) logger.info(f'original shape: {weight_tensor.T.shape}') for u in Us: logger.info(f'factor shape: {u.shape}') f_t, f_s, f_y, f_x = Us f_x = f_x * lambdas t = weights.shape[0] #num of filters d = layer.kernel_size[0] #kernel size s = weights.shape[1] #num of channels #s y x t k_s_layer = nn.Conv2d(in_channels=s, out_channels=rk, kernel_size=1, padding=0, stride=1, dilation=layer.dilation, bias=False) k_y_layer = nn.Conv2d(in_channels=rk, out_channels=rk, kernel_size=(d, 1), stride=1, padding=(layer.padding[0], 0), dilation=layer.dilation, groups=rk, bias=False) k_x_layer = nn.Conv2d(in_channels=rk, out_channels=rk, kernel_size=(1, d), stride=layer.stride, padding=(0, layer.padding[0]), dilation=layer.dilation, groups=rk, bias=False) k_t_layer = nn.Conv2d(in_channels=rk, out_channels=t, kernel_size=1, padding=0, stride=1, dilation=layer.dilation, bias=True) if layer.bias is not None: k_t_layer.bias.data = layer.bias.data if self.verbose: logger.info('after shape: ' + str((f_s.shape, f_y.shape, f_x.shape, f_t.shape))) f_s = np.reshape(f_s.T, [rk, s, 1, 1]) f_y = np.reshape(f_y.T, [rk, 1, d, 1]) f_x = np.reshape(f_x.T, [rk, 1, 1, d]) f_t = np.reshape(f_t, [t, rk, 1, 1]) k_s_layer.weight.data = tensor(f_s).float() k_y_layer.weight.data = tensor(f_y).float() k_x_layer.weight.data = tensor(f_x).float() k_t_layer.weight.data = tensor(f_t).float() new_layers = [k_s_layer, k_y_layer, k_x_layer, k_t_layer] return new_layers elif self.factorization == 'tt': Gs = weight_tensor.tt_factorization(0.01) tt = Gs[0] for i in range(1, len(Gs)): tt = np.tensordot(tt, Gs[i], [len(tt.shape) - 1, 0]) if self.verbose: logger.info(f'original shape: {weight_tensor.T.shape}') sum = 1 for s in weight_tensor.T.shape: sum *= s logger.info(f'original parameters: {sum}') sum = 0 for g in Gs: sum_i = 1 for s in g.shape: sum_i *= s sum += sum_i logger.info(g.shape) logger.info(f'tt parameters: {sum}') logger.info( f'tt shape {tt.reshape(weight_tensor.T.shape).shape}') logger.info( f'tt-svd error {weight_tensor.frobenius_norm(weight_tensor.T - tt.reshape(weight_tensor.T.shape))}' ) logger.info( f'tt-svd error {weight_tensor.relative_error(tt.reshape(weight_tensor.T.shape))}' ) tt_layer = TTConvLayer(in_channels=layer.in_channels, out_channels=layer.out_channels, kernel_size=layer.kernel_size, weights=Gs, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, groups=layer.groups, bias=True, padding_mode=layer.padding_mode) return [tt_layer] else: raise ValueError('Not supported decomposition for this layer ')