def test_sinc_filters(): filters = SincConv( in_channels=1, out_channels=128, kernel_size=101, stride=1, fs=16000 ) x = torch.randn([50, 1, 400], requires_grad=True) y = filters(x) assert y.shape == torch.Size([50, 128, 300]) # now test multichannel filters = SincConv( in_channels=2, out_channels=128, kernel_size=101, stride=1, fs=16000 ) x = torch.randn([50, 2, 400], requires_grad=True) y = filters(x) assert y.shape == torch.Size([50, 128, 300])
def _create_sinc_convs(self): blocks = OrderedDict() # SincConvBlock out_channels = 128 self.filters = SincConv( self.in_channels, out_channels, kernel_size=101, stride=1, fs=self.fs, window_func=self.windowing_type, scale_type=self.scale_type, ) block = OrderedDict([ ("Filters", self.filters), ("LogCompression", LogCompression()), ("BatchNorm", torch.nn.BatchNorm1d(out_channels, affine=True)), ("AvgPool", torch.nn.AvgPool1d(2)), ]) blocks["SincConvBlock"] = torch.nn.Sequential(block) in_channels = out_channels # First convolutional block, connects the sinc output to the front-end "body" out_channels = 128 blocks["DConvBlock1"] = self.gen_lsc_block( in_channels, out_channels, depthwise_kernel_size=25, depthwise_stride=2, pointwise_groups=0, avgpool=True, dropout_probability=0.1, ) in_channels = out_channels # Second convolutional block, multiple convolutional layers out_channels = self.out_channels for layer in [2, 3, 4]: blocks[f"DConvBlock{layer}"] = self.gen_lsc_block( in_channels, out_channels, depthwise_kernel_size=9, depthwise_stride=1) in_channels = out_channels # Third Convolutional block, acts as coupling to encoder out_channels = self.out_channels blocks["DConvBlock5"] = self.gen_lsc_block( in_channels, out_channels, depthwise_kernel_size=7, depthwise_stride=1, pointwise_groups=0, ) self.blocks = torch.nn.Sequential(blocks)
def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args): """Plot the Sinc filter kernels. Args: filters (torch.Tensor): Filter parameters. sample_rate (int): Sample rate of Signal. args (dict): Dictionary with output options. """ from espnet2.layers.sinc_conv import SincConv print("When plotting filter kernels, make sure the script has the" " correct SincConv settings (currently hard-coded).") convs = SincConv(1, 128, 101) # unlearned convs._create_filters(convs.f.device) pre_kernels = convs.sinc_filters.detach().numpy() pre_filters = convs.f.detach().numpy() f_mins = np.abs(pre_filters[:, 0]) f_maxs = np.abs( pre_filters[:, 0]) + np.abs(pre_filters[:, 1] - pre_filters[:, 0]) F_mins, F_maxs = f_mins * sample_rate, f_maxs * sample_rate pre_F_mins, pre_F_maxs = np.round(F_mins).astype( np.int), np.round(F_maxs).astype(np.int) # learned convs.f = torch.nn.Parameter(torch.Tensor(filters)) convs._create_filters(convs.f.device) kernels = convs.sinc_filters.detach().numpy() f_mins = np.abs(filters[:, 0]) f_maxs = np.abs(filters[:, 0]) + np.abs(filters[:, 1] - filters[:, 0]) F_mins, F_maxs = f_mins * sample_rate, f_maxs * sample_rate F_mins, F_maxs = np.round(F_mins).astype(np.int), np.round(F_maxs).astype( np.int) F_mins, F_maxs = np.clip(F_mins, 0, sample_rate / 2.0), np.clip( F_maxs, 0, sample_rate / 2.0) x_f = np.linspace(0.0, np.max(F_maxs), int(np.max(F_maxs)) + 1) x = np.arange(kernels.shape[2]) if args.all: for i in range(len(kernels)): pre_kernel = pre_kernels[i][0] plt.clf() plt.xticks([]) plt.yticks([]) plt.plot(x, pre_kernel) img_name = "filter_pre_kernel_%s.%s" % (str(i).zfill(2), args.filetype) img_path = str(args.out_folder / img_name) plt.savefig(img_path, bbox_inches="tight") print("Plotted %s" % img_path) kernel = kernels[i][0] plt.clf() plt.xticks([]) plt.yticks([]) plt.plot(x, kernel) img_name = "filter_kernel_%s.%s" % (str(i).zfill(2), args.filetype) img_path = str(args.out_folder / img_name) plt.savefig(img_path, bbox_inches="tight") print("Plotted %s" % img_path) plt.clf() plt.xlabel("kernel index") plt.plot(x, kernel) plt.plot(x, pre_kernel, "--", alpha=0.5) img_name = "filter_kernel_both_%s.%s" % (str(i).zfill(2), args.filetype) img_path = str(args.out_folder / img_name) plt.savefig(img_path, bbox_inches="tight") print("Plotted %s" % img_path) y = np.zeros_like(x_f) y[F_mins[i]:F_maxs[i]] = 1.0 plt.clf() plt.plot(x_f, y) img_name = "filter_freq_%s.%s" % (str(i).zfill(2), args.filetype) img_path = str(args.out_folder / img_name) plt.savefig(img_path, bbox_inches="tight") print("Plotted %s" % img_path) pre_y = np.zeros_like(x_f) pre_y[pre_F_mins[i]:pre_F_maxs[i]] = 1.0 plt.clf() plt.plot(x_f, y) plt.plot(x_f, pre_y) img_name = "filter_freq_both_%s.%s" % (str(i).zfill(2), args.filetype) img_path = args.out_folder / img_name plt.savefig(img_path, bbox_inches="tight") print("Plotted %s" % img_path) plt.clf() filters = [32, 71, 113, 126] fig, axs = plt.subplots(2, 2, sharex=True, sharey="row") axs[0, 0].plot(x, kernels[filters[0]][0]) axs[0, 0].plot(x, pre_kernels[filters[0]][0], "--", alpha=0.5) axs[0, 1].plot(x, kernels[filters[1]][0]) axs[0, 1].plot(x, pre_kernels[filters[1]][0], "--", alpha=0.5) axs[1, 0].plot(x, kernels[filters[2]][0]) axs[1, 0].plot(x, pre_kernels[filters[2]][0], "--", alpha=0.5) axs[1, 1].plot(x, kernels[filters[3]][0]) axs[1, 1].plot(x, pre_kernels[filters[3]][0], "--", alpha=0.5) img_name = "filter_kernel_ensemble2.%s" % (args.filetype) img_path = str(args.out_folder / img_name) plt.savefig(img_path, bbox_inches="tight") plt.close(fig) print("Plotted %s" % img_path)
def test_sinc_filter_output_size(): sinc_conv = SincConv(in_channels=1, out_channels=128, kernel_size=101) assert sinc_conv.get_odim(400) == 300