Python draw_spectrogram示例，audio_utils.draw_spectrogram Python示例

示例#1

0

显示文件

    def _write_x_y(self, kwargs: Dict[str, ndarray],
                   step: int) -> mp.pool.AsyncResult:
        """ write x (input) and y (desired output)

        """
        # F, T, 1
        x = kwargs['x'][..., -1:]
        y = kwargs['y'][..., -1:]
        x_phase = kwargs['x_phase']
        y_phase = kwargs['y_phase']
        self.snrseg_x = calc_snrseg(y, x[:, :y.shape[1], :])

        # T,
        x_wav = reconstruct_wave(x, x_phase)
        y_wav = reconstruct_wave(y, y_phase)

        self.y_scale = np.abs(y_wav).max() / 0.5

        result_eval_x = self.pool_eval_module.apply_async(
            calc_using_eval_module, (y_wav, x_wav[:y_wav.shape[0]]))
        # result_eval_x = None
        # self.dict_eval_x = calc_using_eval_module(y_wav, x_wav[:y_wav.shape[0]])

        if hp.draw_test_fig or self.group == 'train':
            ymin = y[y > 0].min()
            self.pad_min = np.full(
                (y.shape[0], x.shape[1] - y.shape[1], y.shape[2]), ymin)
            vmin, vmax = 20 * LogModule.log_(np.array((ymin, y.max())))
            self.kwargs_fig = dict(vmin=vmin, vmax=vmax)

            fig_x = draw_spectrogram(x)
            fig_y = draw_spectrogram(np.append(y, self.pad_min, axis=1))

            self.add_figure('1_Anechoic Spectrum', fig_y, step)
            self.add_figure('2_Reverberant Spectrum', fig_x, step)

        if hp.add_test_audio or self.group == 'train':
            self.add_audio('1_Anechoic Wave', y_wav / self.y_scale, step)
            self.add_audio('2_Reverberant Wave',
                           x_wav / (np.abs(x_wav).max() / 0.5), step)

        self.reused_sample = dict(
            x=x,
            y=y,
            x_phase=x_phase,
            y_phase=y_phase,
            x_wav=x_wav,
            y_wav=y_wav,
        )
        if 'path_feature' in kwargs:
            self.reused_sample['path_feature'] = kwargs['path_feature']
        return result_eval_x

示例#2

0

显示文件

文件： train.py 项目： Sytronik/dereverberation-directional-feature

        def save_forward(module: nn.Module, in_: Tensor, out: Tensor):
            """ save forward propagation data

            """
            module_name = str(module).split('(')[0]
            dict_to_save = dict()
            # dict_to_save['in'] = in_.detach().cpu().numpy().squeeze()
            dict_to_save['out'] = out.detach().cpu().numpy().squeeze()

            i_module = module_counts[module_name]
            for i, o in enumerate(dict_to_save['out']):
                save_forward.writer.add_figure(
                    f'{group}/blockout_{i_iter}/{module_name}{i_module}',
                    draw_spectrogram(o, to_db=False),
                    i,
                )
            scio.savemat(
                str(logdir / f'blockout_{i_iter}_{module_name}{i_module}.mat'),
                dict_to_save,
            )
            module_counts[module_name] += 1

示例#3

0

显示文件

    def test(self, loader, epoch: int):
        self.model.eval()
        state_dict = torch.load(Path(self.writer.log_dir, f'{epoch}.pt'))
        if isinstance(self.model, nn.DataParallel):
            self.model.module.load_state_dict(state_dict)
        else:
            self.model.load_state_dict(state_dict)
        path_test_result = Path(self.writer.log_dir, f'test_{epoch}')
        os.makedirs(path_test_result, exist_ok=True)

        avg_eval_out, avg_eval_x = 0., 0.

        pbar = tqdm(loader,
                    desc=f'test {epoch:3d}',
                    postfix='-',
                    dynamic_ncols=True)
        for i_batch, data in enumerate(pbar):
            xs, ys, T_ys = self.preprocess(data)

            outs = self.model(xs).squeeze()

            data['out'] = outs
            data = self.postprocess(data, loader.dataset)

            sample = data[0]
            spec = wav2spec(sample)

            fig_x = draw_spectrogram(spec['x'],
                                     dpi=300,
                                     **dict(vmin=-50, vmax=20))
            fig_y = draw_spectrogram(spec['y'],
                                     dpi=300,
                                     **dict(vmin=-50, vmax=20))
            fig_out = draw_spectrogram(spec['out'],
                                       dpi=300,
                                       **dict(vmin=-50, vmax=20))
            fig_err = draw_spectrogram(spec['err'],
                                       dpi=300,
                                       to_db=False,
                                       **dict(vmin=-20, vmax=20))

            fig_x.savefig(path_test_result /
                          str(sample["fname"][:-4] + '_x.png'))
            fig_y.savefig(path_test_result /
                          str(sample['fname'][:-4] + '_y.png'))
            fig_out.savefig(path_test_result /
                            str(sample['fname'][:-4] + '_out.png'))
            fig_err.savefig(path_test_result /
                            str(sample['fname'][:-4] + '_err.png'))

            plt.close('all')

            eval_outs = self.evaluate(data, 'out')
            avg_eval_out += eval_outs

            eval_xs = self.evaluate(data, 'x')
            avg_eval_x += eval_xs

        avg_eval_out = avg_eval_out / len(loader.dataset)
        avg_eval_x = avg_eval_x / len(loader.dataset)

        return avg_eval_out, avg_eval_x

示例#4

0

显示文件

文件： tbXwriter.py 项目： aleksandryessin/denoising-wavenet-pytorch

    def write_one(self,
                  step: int,
                  out: ndarray = None,
                  **kwargs: ndarray) -> ndarray:
        """ write summary about one sample of output(and x and y optionally).

        :param step:
        :param out:
        :param kwargs: keywords can be [x, y]

        :return: evaluation result
        """

        assert out is not None
        if kwargs:
            x, y = kwargs['x'], kwargs['y']
            do_reuse = False
        else:
            assert self.reused_sample
            x, y = None, None
            do_reuse = True

        if do_reuse:
            y = self.reused_sample['y']
            pad_one = self.reused_sample['pad_one']

            snrseg_x = self.measure_x['SNRseg']
            odict_eval_x = self.measure_x['odict_eval']
        else:
            # T,
            x = x.mean(0)
            y = y.squeeze()

            if hp.do_bnkr_eq:
                x = bnkr_equalize_time(x)

            # x *= np.linalg.norm(y, ord=2) / np.linalg.norm(x, ord=2)

            snrseg_x = calc_snrseg_time(y, x[:len(y)], hp.l_frame, hp.l_hop)
            odict_eval_x = calc_using_eval_module(y, x[:len(y)])

            # draw
            self.xlim = (0, len(x) / hp.fs)
            self.y_max = np.abs(y).max()

            fig_x = draw_audio(x,
                               hp.fs,
                               xlim=self.xlim,
                               ylim=(-self.y_max * 1.4, self.y_max * 1.4))
            fig_y = draw_audio(y,
                               hp.fs,
                               xlim=self.xlim,
                               ylim=(-self.y_max * 1.4, self.y_max * 1.4))

            x_spec = librosa.amplitude_to_db(
                np.abs(librosa.stft(x, **hp.kwargs_stft)))
            y_spec = librosa.amplitude_to_db(
                np.abs(librosa.stft(y, **hp.kwargs_stft)))

            vmin, vmax = y_spec.min(), y_spec.max()
            pad_one = np.ones(
                (y_spec.shape[0], x_spec.shape[1] - y_spec.shape[1]))
            y_spec = np.append(y_spec, y_spec.min() * pad_one, axis=1)

            fig_x_spec = draw_spectrogram(x_spec, hp.fs, to_db=False)
            fig_y_spec = draw_spectrogram(y_spec, hp.fs, to_db=False)

            self.add_figure(f'{self.group}/1_Anechoic Spectrum', fig_y_spec,
                            step)
            self.add_figure(f'{self.group}/2_Reverberant Spectrum', fig_x_spec,
                            step)

            self.add_figure(f'{self.group}/4_Anechoic Wave', fig_y, step)
            self.add_figure(f'{self.group}/5_Reverberant Wave', fig_x, step)

            self.add_audio(f'{self.group}/1_Anechoic Wave',
                           torch.from_numpy(y / self.y_max * 0.707),
                           step,
                           sample_rate=hp.fs)
            self.add_audio(f'{self.group}/2_Reverberant Wave',
                           torch.from_numpy(x / np.abs(x).max() * 0.707),
                           step,
                           sample_rate=hp.fs)

            self.reused_sample = dict(
                x=x,
                y=y,
                pad_one=pad_one,
            )
            self.measure_x = dict(SNRseg=snrseg_x, odict_eval=odict_eval_x)
            self.kwargs_fig = dict(vmin=vmin, vmax=vmax)

        out = out.squeeze()

        snrseg = calc_snrseg_time(y, out, hp.l_frame, hp.l_hop)

        odict_eval = calc_using_eval_module(y, out)

        fig_out = draw_audio(out,
                             hp.fs,
                             xlim=self.xlim,
                             ylim=(-self.y_max * 1.4, self.y_max * 1.4))

        out_spec = librosa.amplitude_to_db(
            np.abs(librosa.stft(out, **hp.kwargs_stft)))
        out_spec = np.append(out_spec,
                             self.kwargs_fig['vmin'] * pad_one,
                             axis=1)
        fig_out_spec = draw_spectrogram(out_spec,
                                        hp.fs,
                                        to_db=False,
                                        **self.kwargs_fig)

        self.add_scalar(f'{self.group}/1_SNRseg/Reverberant', snrseg_x, step)
        self.add_scalar(f'{self.group}/1_SNRseg/Proposed', snrseg, step)
        for i, m in enumerate(odict_eval.keys()):
            self.add_scalar(f'{self.group}/{2 + i}_{m}/Reverberant',
                            odict_eval_x[m], step)
            self.add_scalar(f'{self.group}/{2 + i}_{m}/Proposed',
                            odict_eval[m], step)

        self.add_figure(f'{self.group}/3_Estimated Anechoic Spectrum',
                        fig_out_spec, step)
        self.add_figure(f'{self.group}/6_Estimated Anechoic Wave', fig_out,
                        step)

        self.add_audio(f'{self.group}/3_Estimated Anechoic Wave',
                       out / self.y_max * 0.707,
                       step,
                       sample_rate=hp.fs)

        return np.array([[snrseg, *(odict_eval.values())],
                         [snrseg_x, *(odict_eval_x.values())]])

示例#5

0

显示文件

    def write_one(self,
                  step: int,
                  out: ndarray = None,
                  eval_with_y_ph=False,
                  **kwargs: ndarray) -> ndarray:
        """ write summary about one sample of output(and x and y optionally).

        :param step:
        :param out:
        :param eval_with_y_ph: if true, out reconstructed with true phase is evaluated.
        :param kwargs: keywords can be [x, y, x_phase, y_phase, path_feature]

        :return: evaluation result
        """

        assert out is not None
        result_eval_x = self._write_x_y(kwargs, step) if kwargs else None

        assert self.reused_sample
        y = self.reused_sample['y']
        x_phase = self.reused_sample['x_phase']
        y_phase = self.reused_sample['y_phase']
        # x_wav = self.reused_sample['x_wav']
        y_wav = self.reused_sample['y_wav']

        np.maximum(out, 0, out=out)

        if not eval_with_y_ph or (hp.add_test_audio or self.group == 'train'):
            if hp.use_das_phase:
                path_feature = Path(self.reused_sample['path_feature'])
                path = Path(
                    str(path_feature).replace(
                        hp.feature, hp.folder_das_phase)).with_suffix('.npy')
                x_phase = np.load(path)

            out_wav = reconstruct_wave(
                out,
                x_phase[:, :out.shape[1]],
                n_iter=hp.n_gla_iter,
                momentum=hp.momentum_gla,
            )
        else:
            out_wav = None

        if eval_with_y_ph or (hp.add_test_audio or self.group == 'train'):
            out_wav_y_ph = reconstruct_wave(out, y_phase)
        else:
            out_wav_y_ph = None

        result_eval = self.pool_eval_module.apply_async(
            calc_using_eval_module,
            (y_wav, out_wav_y_ph if eval_with_y_ph else out_wav))
        # dict_eval = calc_using_eval_module(
        #     y_wav,
        #     out_wav_y_ph if eval_with_y_ph else out_wav
        # )
        snrseg = calc_snrseg(y, out)

        if hp.draw_test_fig or self.group == 'train':
            fig_out = draw_spectrogram(np.append(out, self.pad_min, axis=1),
                                       **self.kwargs_fig)

            self.add_figure('3_Estimated Anechoic Spectrum', fig_out, step)

        if hp.add_test_audio or self.group == 'train':
            self.add_audio('3_Estimated Anechoic Wave', out_wav / self.y_scale,
                           step)
            self.add_audio('4_Estimated Wave with Anechoic Phase',
                           out_wav_y_ph / self.y_scale, step)

        self.add_scalar('1_SNRseg/Reverberant', self.snrseg_x, step)
        self.add_scalar('1_SNRseg/Proposed', snrseg, step)

        if result_eval_x:
            self.dict_eval_x = result_eval_x.get()
        dict_eval = result_eval.get()
        for i, m in enumerate(dict_eval.keys()):
            j = i + 2
            self.add_scalar(f'{j}_{m}/Reverberant', self.dict_eval_x[m], step)
            self.add_scalar(f'{j}_{m}/Proposed', dict_eval[m], step)

        all_results = [[snrseg, *dict_eval.values()],
                       [self.snrseg_x, *self.dict_eval_x.values()]]
        return np.array(all_results, dtype=np.float32)

示例#6

0

显示文件

    def write_one(self,
                  step: int,
                  out: ndarray = None,
                  eval_with_y_ph=False,
                  **kwargs: ndarray) -> ndarray:
        """ write summary about one sample of output(and x and y optionally).

        :param step:
        :param out:
        :param eval_with_y_ph: determine if `out` is evaluated with `y_phase`
        :param kwargs: keywords can be [x, y, x_phase, y_phase]

        :return: evaluation result
        """

        assert out is not None
        result_eval_x = self.write_x_y(kwargs, step) if kwargs else None

        assert self.reused_sample
        y = self.reused_sample['y']
        x_phase = self.reused_sample['x_phase']
        y_phase = self.reused_sample['y_phase']
        y_wav = self.reused_sample['y_wav']

        np.maximum(out, 0, out=out)

        snrseg = calc_snrseg(y, out)

        out_wav = reconstruct_wave(out,
                                   x_phase[:, :out.shape[1], :],
                                   n_iter=hp.n_glim_iter)
        out_wav_y_ph = reconstruct_wave(out, y_phase)

        result_eval = self.pool_eval_module.apply_async(
            calc_using_eval_module,
            (y_wav, out_wav_y_ph if eval_with_y_ph else out_wav))
        # dict_eval = calc_using_eval_module(
        #     y_wav,
        #     out_wav_y_ph if eval_with_y_ph else out_wav
        # )

        if hp.draw_test_fig or self.group == 'train':
            fig_out = draw_spectrogram(np.append(out, self.pad_min, axis=1),
                                       **self.kwargs_fig)
            self.add_figure(f'{self.group}/3_Estimated Anechoic Spectrum',
                            fig_out, step)

        self.add_audio(f'{self.group}/3_Estimated Anechoic Wave',
                       torch.from_numpy(out_wav / self.y_scale),
                       step,
                       sample_rate=hp.fs)
        self.add_audio(f'{self.group}/4_Estimated Wave with Anechoic Phase',
                       torch.from_numpy(out_wav_y_ph / self.y_scale),
                       step,
                       sample_rate=hp.fs)

        self.add_scalar(f'{self.group}/1_SNRseg/Reverberant', self.snrseg_x,
                        step)
        self.add_scalar(f'{self.group}/1_SNRseg/Proposed', snrseg, step)

        if result_eval_x:
            self.dict_eval_x = result_eval_x.get()
        dict_eval = result_eval.get()
        for i, m in enumerate(dict_eval.keys()):
            j = i + 2
            self.add_scalar(f'{self.group}/{j}_{m}/Reverberant',
                            self.dict_eval_x[m], step)
            self.add_scalar(f'{self.group}/{j}_{m}/Proposed', dict_eval[m],
                            step)

        return np.array([[snrseg, *(dict_eval.values())],
                         [self.snrseg_x, *(self.dict_eval_x.values())]],
                        dtype=np.float32)