示例#1
0
def test_sinc_filters():
    filters = SincConv(
        in_channels=1, out_channels=128, kernel_size=101, stride=1, fs=16000
    )
    x = torch.randn([50, 1, 400], requires_grad=True)
    y = filters(x)
    assert y.shape == torch.Size([50, 128, 300])
    # now test multichannel
    filters = SincConv(
        in_channels=2, out_channels=128, kernel_size=101, stride=1, fs=16000
    )
    x = torch.randn([50, 2, 400], requires_grad=True)
    y = filters(x)
    assert y.shape == torch.Size([50, 128, 300])
示例#2
0
    def _create_sinc_convs(self):
        blocks = OrderedDict()

        # SincConvBlock
        out_channels = 128
        self.filters = SincConv(
            self.in_channels,
            out_channels,
            kernel_size=101,
            stride=1,
            fs=self.fs,
            window_func=self.windowing_type,
            scale_type=self.scale_type,
        )
        block = OrderedDict([
            ("Filters", self.filters),
            ("LogCompression", LogCompression()),
            ("BatchNorm", torch.nn.BatchNorm1d(out_channels, affine=True)),
            ("AvgPool", torch.nn.AvgPool1d(2)),
        ])
        blocks["SincConvBlock"] = torch.nn.Sequential(block)
        in_channels = out_channels

        # First convolutional block, connects the sinc output to the front-end "body"
        out_channels = 128
        blocks["DConvBlock1"] = self.gen_lsc_block(
            in_channels,
            out_channels,
            depthwise_kernel_size=25,
            depthwise_stride=2,
            pointwise_groups=0,
            avgpool=True,
            dropout_probability=0.1,
        )
        in_channels = out_channels

        # Second convolutional block, multiple convolutional layers
        out_channels = self.out_channels
        for layer in [2, 3, 4]:
            blocks[f"DConvBlock{layer}"] = self.gen_lsc_block(
                in_channels,
                out_channels,
                depthwise_kernel_size=9,
                depthwise_stride=1)
            in_channels = out_channels

        # Third Convolutional block, acts as coupling to encoder
        out_channels = self.out_channels
        blocks["DConvBlock5"] = self.gen_lsc_block(
            in_channels,
            out_channels,
            depthwise_kernel_size=7,
            depthwise_stride=1,
            pointwise_groups=0,
        )

        self.blocks = torch.nn.Sequential(blocks)
示例#3
0
def plot_filter_kernels(filters: torch.Tensor, sample_rate: int, args):
    """Plot the Sinc filter kernels.

    Args:
        filters (torch.Tensor): Filter parameters.
        sample_rate (int): Sample rate of Signal.
        args (dict): Dictionary with output options.
    """
    from espnet2.layers.sinc_conv import SincConv

    print("When plotting filter kernels, make sure the script has the"
          " correct SincConv settings (currently hard-coded).")
    convs = SincConv(1, 128, 101)

    # unlearned
    convs._create_filters(convs.f.device)
    pre_kernels = convs.sinc_filters.detach().numpy()

    pre_filters = convs.f.detach().numpy()
    f_mins = np.abs(pre_filters[:, 0])
    f_maxs = np.abs(
        pre_filters[:, 0]) + np.abs(pre_filters[:, 1] - pre_filters[:, 0])
    F_mins, F_maxs = f_mins * sample_rate, f_maxs * sample_rate
    pre_F_mins, pre_F_maxs = np.round(F_mins).astype(
        np.int), np.round(F_maxs).astype(np.int)

    # learned
    convs.f = torch.nn.Parameter(torch.Tensor(filters))
    convs._create_filters(convs.f.device)
    kernels = convs.sinc_filters.detach().numpy()

    f_mins = np.abs(filters[:, 0])
    f_maxs = np.abs(filters[:, 0]) + np.abs(filters[:, 1] - filters[:, 0])
    F_mins, F_maxs = f_mins * sample_rate, f_maxs * sample_rate
    F_mins, F_maxs = np.round(F_mins).astype(np.int), np.round(F_maxs).astype(
        np.int)
    F_mins, F_maxs = np.clip(F_mins, 0, sample_rate / 2.0), np.clip(
        F_maxs, 0, sample_rate / 2.0)

    x_f = np.linspace(0.0, np.max(F_maxs), int(np.max(F_maxs)) + 1)
    x = np.arange(kernels.shape[2])
    if args.all:
        for i in range(len(kernels)):
            pre_kernel = pre_kernels[i][0]
            plt.clf()
            plt.xticks([])
            plt.yticks([])
            plt.plot(x, pre_kernel)
            img_name = "filter_pre_kernel_%s.%s" % (str(i).zfill(2),
                                                    args.filetype)
            img_path = str(args.out_folder / img_name)
            plt.savefig(img_path, bbox_inches="tight")
            print("Plotted %s" % img_path)

            kernel = kernels[i][0]
            plt.clf()
            plt.xticks([])
            plt.yticks([])
            plt.plot(x, kernel)
            img_name = "filter_kernel_%s.%s" % (str(i).zfill(2), args.filetype)
            img_path = str(args.out_folder / img_name)
            plt.savefig(img_path, bbox_inches="tight")
            print("Plotted %s" % img_path)

            plt.clf()
            plt.xlabel("kernel index")
            plt.plot(x, kernel)
            plt.plot(x, pre_kernel, "--", alpha=0.5)
            img_name = "filter_kernel_both_%s.%s" % (str(i).zfill(2),
                                                     args.filetype)
            img_path = str(args.out_folder / img_name)
            plt.savefig(img_path, bbox_inches="tight")
            print("Plotted %s" % img_path)

            y = np.zeros_like(x_f)
            y[F_mins[i]:F_maxs[i]] = 1.0
            plt.clf()
            plt.plot(x_f, y)
            img_name = "filter_freq_%s.%s" % (str(i).zfill(2), args.filetype)
            img_path = str(args.out_folder / img_name)
            plt.savefig(img_path, bbox_inches="tight")
            print("Plotted %s" % img_path)

            pre_y = np.zeros_like(x_f)
            pre_y[pre_F_mins[i]:pre_F_maxs[i]] = 1.0
            plt.clf()
            plt.plot(x_f, y)
            plt.plot(x_f, pre_y)
            img_name = "filter_freq_both_%s.%s" % (str(i).zfill(2),
                                                   args.filetype)
            img_path = args.out_folder / img_name
            plt.savefig(img_path, bbox_inches="tight")
            print("Plotted %s" % img_path)

    plt.clf()
    filters = [32, 71, 113, 126]
    fig, axs = plt.subplots(2, 2, sharex=True, sharey="row")

    axs[0, 0].plot(x, kernels[filters[0]][0])
    axs[0, 0].plot(x, pre_kernels[filters[0]][0], "--", alpha=0.5)
    axs[0, 1].plot(x, kernels[filters[1]][0])
    axs[0, 1].plot(x, pre_kernels[filters[1]][0], "--", alpha=0.5)
    axs[1, 0].plot(x, kernels[filters[2]][0])
    axs[1, 0].plot(x, pre_kernels[filters[2]][0], "--", alpha=0.5)
    axs[1, 1].plot(x, kernels[filters[3]][0])
    axs[1, 1].plot(x, pre_kernels[filters[3]][0], "--", alpha=0.5)

    img_name = "filter_kernel_ensemble2.%s" % (args.filetype)
    img_path = str(args.out_folder / img_name)
    plt.savefig(img_path, bbox_inches="tight")
    plt.close(fig)
    print("Plotted %s" % img_path)
示例#4
0
def test_sinc_filter_output_size():
    sinc_conv = SincConv(in_channels=1, out_channels=128, kernel_size=101)
    assert sinc_conv.get_odim(400) == 300