def song(self,path):
        y, sr = librosa.load(self.path, duration=120)
        S_full, phase = librosa.magphase(librosa.stft(y))

        S_filter = librosa.decompose.nn_filter(S_full,aggregate=np.median,metric='cosine',
                    width=int(librosa.time_to_frames(2, sr=sr)))                                     
        S_filter = np.minimum(S_full, S_filter)
        margin_i, margin_v = 2, 10
        power = 2

        mask_i = librosa.util.softmask(S_filter,margin_i * (S_full - S_filter),power=power)
        mask_v = librosa.util.softmask(S_full - S_filter,margin_v * S_filter,power=power)

        S_foreground = mask_v * S_full
        S_background = mask_i * S_full

        music =librosa.griffinlim(S_background)
        vocal =librosa.griffinlim(S_foreground)
        scipy.io.wavfile.write('sound_results/song/music.wav',sr,music) 
        scipy.io.wavfile.write('sound_results/song/vocal.wav',sr,vocal) 
        utl.plotSounds([music, vocal], ["music", "vocal"], sr, "plot_results/song/song_separation_plot.png")
        img = pg.QtGui.QGraphicsPixmapItem(pg.QtGui.QPixmap('plot_results/song/song_separation_plot.png'))
        self.ui1.widget_song.addItem(img)
        self.ui1.widget_song.invertY(True)
        self.alarm("Check Plot & Sound Results Files")
示例#2
0
def spectrogram_to_audio(data, data_recon, output_dir):
    data_np = (data.squeeze(0).to(torch.device("cpu"))).detach().numpy()
    data_griffin_lim = librosa.griffinlim(data_np)
    data_recon_np = (data_recon.squeeze(0).to(
        torch.device("cpu"))).detach().numpy()
    data_recon_griffin_lim = librosa.griffinlim(data_recon_np)

    source_aud_path = output_dir + '_input_' + '.wav'
    target_aud_path = output_dir + '_output_' + '.wav'

    librosa.output.write_wav(source_aud_path, data_griffin_lim, 16000)
    librosa.output.write_wav(target_aud_path, data_recon_griffin_lim, 16000)
    return source_aud_path, target_aud_path
示例#3
0
def img_to_audio(image=None, out_wav=None, sr=48000, hl=None, wl=None):

    if image == None or ".png" not in image:
        print("Please Specify an image file! (e.g. my_image.png)")
        return None
    elif out_wav == None or ".wav" not in out_wav:
        print("Please Specify an output file! (e.g. my_sound.wav)")
        return None

    if wl == "None":
        wl = None
    if hl == "None":
        hl = None

    basepath = path.dirname(__file__)
    filepath = path.abspath(path.join(basepath, 'images', image))

    print(f"Image location: {filepath}")

    img = cv2.imread(filepath)

    print(f"Read image of shape {img.shape}")

    avg_img = np.mean(img, axis=2)

    snd = librosa.griffinlim(avg_img, n_iter=64, hop_length=hl, win_length=wl)

    print(f"Output audio with {len(snd)} samples and a sample rate of {sr} Hz")

    librosa.output.write_wav(f"audio/{out_wav}", snd, sr)
示例#4
0
    def test_griffinlim(self):

        # NOTE: This test is flaky without a fixed random seed
        # See https://github.com/pytorch/audio/issues/382
        torch.random.manual_seed(42)
        tensor = torch.rand((1, 1000))

        n_fft = 400
        ws = 400
        hop = 100
        window = torch.hann_window(ws)
        normalize = False
        momentum = 0.99
        n_iter = 8
        length = 1000
        rand_init = False
        init = 'random' if rand_init else None

        specgram = F.spectrogram(tensor, 0, window, n_fft, hop, ws, 2,
                                 normalize).sqrt()
        ta_out = F.griffinlim(specgram, window, n_fft, hop, ws, 1, normalize,
                              n_iter, momentum, length, rand_init)
        lr_out = librosa.griffinlim(specgram.squeeze(0).numpy(),
                                    n_iter=n_iter,
                                    hop_length=hop,
                                    momentum=momentum,
                                    init=init,
                                    length=length)
        lr_out = torch.from_numpy(lr_out).unsqueeze(0)

        self.assertTrue(torch.allclose(ta_out, lr_out, atol=5e-5))
示例#5
0
 def get_audio_from_stft_spectrogram_GL(self, stft_features):
     return librosa.griffinlim(stft_features,
                               init=None,
                               win_length=self.window_length,
                               hop_length=self.overlap,
                               window=self.window,
                               center=True)
    def test_griffinlim(self, momentum):
        # FFT params
        n_fft = 400
        win_length = n_fft
        hop_length = n_fft // 4
        window = torch.hann_window(win_length, device=self.device)
        power = 1
        # GriffinLim params
        n_iter = 8

        waveform = get_whitenoise(device=self.device, dtype=self.dtype)
        specgram = get_spectrogram(
            waveform, n_fft=n_fft, hop_length=hop_length, power=power,
            win_length=win_length, window=window)

        result = F.griffinlim(
            specgram,
            window=window,
            n_fft=n_fft,
            hop_length=hop_length,
            win_length=win_length,
            power=power,
            n_iter=n_iter,
            momentum=momentum,
            length=waveform.size(1),
            rand_init=False)
        expected = librosa.griffinlim(
            specgram[0].cpu().numpy(),
            n_iter=n_iter,
            hop_length=hop_length,
            momentum=momentum,
            init=None,
            length=waveform.size(1))[None, ...]
        self.assertEqual(result, torch.from_numpy(expected), atol=5e-5, rtol=1e-07)
示例#7
0
def griffin_lim_(spc, n_fft, n_shift, win_length, window='hann', n_iters=100):
    """Convert linear spectrogram into waveform using Griffin-Lim.

    Args:
        spc (ndarray): Linear spectrogram (T, n_fft // 2 + 1).
        n_fft (int): Number of FFT points.
        n_shift (int): Shift size in points.
        win_length (int): Window length in points.
        window (str, optional): Window function type.
        n_iters (int, optionl): Number of iterations of Griffin-Lim Algorithm.

    Returns:
        ndarray: Reconstructed waveform (N,).

    """
    # assert the size of input linear spectrogram
    assert spc.shape[1] == n_fft // 2 + 1
    spc = np.abs(spc.T)
    y = librosa.griffinlim(
        S=spc,
        n_iter=n_iters,
        hop_length=n_shift,
        win_length=win_length,
        window=window
    )
    return y
 def inverse_stft_griffin_lim(self, stft_mat, name):
     filename = self.output_path + self.h_params.time_for_output + "_griffin_" + self.output_name + "_" + name + ".wav"
     istft_mat = librosa.griffinlim(
         abs(stft_mat),
         hop_length=self.h_params.stft_hop_length,
         win_length=self.h_params.stft_window_size)
     sf.write(filename, istft_mat, self.h_params.down_sample_rate)
     return istft_mat
示例#9
0
def spec_wav(specgram, filename):
    # Return the all-zero vector with the same shape of `a_content`
    a = np.exp(specgram.cpu().detach().numpy()) - 1
    p = 2 * np.pi * np.random.random_sample(specgram.shape) - np.pi

    x = librosa.griffinlim(a)
    p = np.angle(librosa.stft(x, 400))
    librosa.output.write_wav(filename, x, sr=22050)
def audio_reconstruction_test(src_dir, dest_dir, ext=".png", size=None):
    """
    Test different approaches to image to audio conversion

    :param src_dir: image directory
    :param dest_dir: audio directory
    :param ext: image type
    :param size: desired dimension for resizing
    """
    paths = prep_utils.get_unprocessed_items(src_dir=src_dir,
                                             dest_dir=dest_dir)

    start_time = time.time()
    for path in paths:
        prep_utils.display_progress_eta(current_item=path,
                                        total_items=paths,
                                        start_time=start_time)

        S = np.load(path)
        cv2.imshow("image", S)
        cv2.waitKey(0)
        pd.DataFrame(S).to_csv(dest_dir + "S.csv", header=None, index=False)

        S_scaled = prep_utils.increase_brightness(S)
        cv2.imshow("image", S_scaled)
        cv2.waitKey(0)
        pd.DataFrame(S_scaled).to_csv(dest_dir + "S_scaled.csv",
                                      header=None,
                                      index=False)

        out_path = dest_dir + "gray" + ext
        cv2.imwrite(out_path, S_scaled)

        S = cv2.imread(out_path, 0)
        S = np.array(S, dtype=np.float32)
        S_recovered = S

        if size:
            S_recovered = cv2.resize(S_recovered, (size, size),
                                     interpolation=cv2.INTER_CUBIC)
        pd.DataFrame(S_recovered).to_csv(dest_dir + "S_recovered.csv",
                                         header=None,
                                         index=False)
        out_path = dest_dir + "resized" + ext
        cv2.imwrite(out_path, S_recovered)

        S_audio = np.genfromtxt(dest_dir + "S_recovered.csv", delimiter=',')
        S_audio = np.array(S_audio, dtype=np.float32)
        S_audio = cv2.resize(S_audio, (431, 1025),
                             interpolation=cv2.INTER_CUBIC)

        y = librosa.griffinlim(S_audio)

        out = dest_dir + "s.wav"

        # Save reconstructed data
        scipy.io.wavfile.write(out, 22050, y)
        break
示例#11
0
def inv_linear_spectrogram(linear_spectrogram):
    if hparams.signal_normalization:
        D = _denormalize(linear_spectrogram)
    else:
        D = linear_spectrogram

    S = _db_to_amp(D + hparams.ref_level_db)  # Convert back to linear

    return librosa.griffinlim(S**1.55)
示例#12
0
def get_spectrogram(wav):
    y, sr = librosa.load(librosa.ex('trumpet'))
    # Get the magnitude spectrogram
    S = np.abs(librosa.stft(y))
    # Invert using Griffin-Lim
    y_inv = librosa.griffinlim(S)
    # Invert without estimating phase
    y_istft = librosa.istft(S)
    return S
示例#13
0
def inv_spectrogram(spectrogram):
    '''Converts spectrogram to waveform using librosa'''
    S = _db_to_amp(_denormalize(spectrogram) +
                   hparams.spec_ref_level_db)  # Convert back to linear
    #S = librosa.db_to_amplitude(_denormalize(spectrogram) + hparams.spec_ref_level_db)
    D = librosa.griffinlim(S**hparams.power,
                           hop_length=hparams.hop_size,
                           win_length=hparams.fft_wsize)
    return inv_preemphasis(D)
示例#14
0
def inv_mel_spectrogram(mel_spectrogram):
    if hparams.signal_normalization:
        D = _denormalize(mel_spectrogram)
    else:
        D = mel_spectrogram

    S = _mel_to_linear(
        _db_to_amp(D + hparams.ref_level_db))  # Convertir de vuelta a lineal

    return librosa.griffinlim(S**1.55)
def inverse_spectrogram(s, mel=False):
    """Convert log-magnitude spectrogram to waveform."""
    S = db_to_amplitude(s)
    wf = ms_to_frames(hp.stft_window_ms)
    hf = ms_to_frames(hp.stft_shift_ms)
    if mel: S = librosa.feature.inverse.mel_to_stft(S, power=1, sr=hp.sample_rate, n_fft=hp.num_fft)
    y = librosa.griffinlim(S ** hp.griffin_lim_power, n_iter=hp.griffin_lim_iters, hop_length=hf, win_length=wf)
    if hp.use_preemphasis: y = deemphasis(y)
    y /= max(y)
    return y
示例#16
0
def griffin_lim(level,audio,args=1):
    
    starting_w=80.0
    ending_w = 1.0
    levels = 100.0
    noise_level = starting_w + level *((ending_w-starting_w)/100)
    noise_level=round(noise_level,4)
    S = np.abs(librosa.stft(audio))
    y_inv = librosa.griffinlim(S,n_iter=int(noise_level))
    return y_inv
示例#17
0
def test_griffinlim_multi(y_multi):
    y, sr = y_multi

    # Compute the stft
    D = librosa.stft(y)

    # Run a couple of iterations of griffin-lim
    yout = librosa.griffinlim(np.abs(D), n_iter=2, length=y.shape[-1])

    # Check the lengths
    assert np.allclose(y.shape, yout.shape)
示例#18
0
def _spec_to_wav(spec, sr=sample_rate, engine='librosa'):
    ''' using Griffin-Lim algorithm '''

    if engine == 'librosa':
        return librosa.griffinlim(spec,
                                  hop_length=stft_params['hop_length'],
                                  win_length=stft_params['win_length'])
    elif engine == 'torch':
        return tf.GriffinLim(**stft_params, power=power)(spec)

    raise ValueError(engine)
示例#19
0
def griffin_lim(
    spc: np.ndarray,
    n_fft: int,
    n_shift: int,
    win_length: int = None,
    window: Optional[str] = "hann",
    n_iter: Optional[int] = 32,
) -> np.ndarray:
    """Convert linear spectrogram into waveform using Griffin-Lim.

    Args:
        spc: Linear spectrogram (T, n_fft // 2 + 1).
        n_fft: The number of FFT points.
        n_shift: Shift size in points.
        win_length: Window length in points.
        window: Window function type.
        n_iter: The number of iterations.

    Returns:
        Reconstructed waveform (N,).

    """
    # assert the size of input linear spectrogram
    assert spc.shape[1] == n_fft // 2 + 1

    if V(librosa.__version__) >= V("0.7.0"):
        # use librosa's fast Grriffin-Lim algorithm
        spc = np.abs(spc.T)
        y = librosa.griffinlim(
            S=spc,
            n_iter=n_iter,
            hop_length=n_shift,
            win_length=win_length,
            window=window,
            center=True if spc.shape[1] > 1 else False,
        )
    else:
        # use slower version of Grriffin-Lim algorithm
        logging.warning(
            "librosa version is old. use slow version of Grriffin-Lim algorithm."
            "if you want to use fast Griffin-Lim, please update librosa via "
            "`source ./path.sh && pip install librosa==0.7.0`."
        )
        cspc = np.abs(spc).astype(np.complex).T
        angles = np.exp(2j * np.pi * np.random.rand(*cspc.shape))
        y = librosa.istft(cspc * angles, n_shift, win_length, window=window)
        for i in range(n_iter):
            angles = np.exp(
                1j
                * np.angle(librosa.stft(y, n_fft, n_shift, win_length, window=window))
            )
            y = librosa.istft(cspc * angles, n_shift, win_length, window=window)

    return y
示例#20
0
 def post_audio(self, y):
     y = np.array(y)
     y = np.exp(y)
     y = librosa.feature.inverse.mel_to_stft(y,
                                             sr=self.sr,
                                             n_fft=self.n_fft,
                                             power=self.power)
     y = librosa.griffinlim(y,
                            hop_length=self.hop_length,
                            win_length=self.win_length)
     return y
示例#21
0
def griffinlim_librosa(spectrogram, fs, hparams):
    hop_length = int(hparams.hop_length_ms / 1000 * fs)
    win_length = int(hparams.win_length_ms / 1000 * fs)
    return inv_preemphasis(
        librosa.griffinlim(
            spectrogram,
            n_iter=hparams.griffin_lim_iters,
            hop_length=hop_length,
            win_length=win_length,
        ),
        hparams,
    )
def extract_audio(
        Z, feature,
        params):  # if normalized Z: unnormalize first, then pass to func.

    # convert to audio
    if feature == "Stft":
        # undo log-magnitude scaling
        S = librosa.db_to_amplitude(Z)

        # upsample
        S = _upsample_fft(S, params["fft_sample_rate"],
                          params["stft_window_length"])

        yhat = librosa.griffinlim(S, hop_length=params["stft_hop_length"])

    elif feature == "Mel":
        # undo log-power scaling
        S = librosa.db_to_power(Z)

        yhat = librosa.feature.inverse.mel_to_audio(
            S,
            sr=params["fft_sample_rate"],
            n_fft=params["stft_window_length"],
            hop_length=params["stft_hop_length"],
        )

    elif feature == "Cqt":
        # undo log-amplitude scaling
        S = librosa.db_to_amplitude(Z)

        yhat = librosa.griffinlim_cqt(
            S,
            sr=params["fft_sample_rate"],
            hop_length=params["stft_hop_length"],
            fmin=librosa.note_to_hz(params["cqt_min_frequency"]),
        )

    elif feature == "Mfcc":

        yhat = librosa.feature.inverse.mfcc_to_audio(
            Z,
            n_mels=params["frequency_bins"],
            sr=params["fft_sample_rate"],
            n_fft=params["stft_window_length"],
            hop_length=params["stft_hop_length"],
        )

    else:
        print("Error: feature invalid")
        # throw/raise something
        return -1

    return yhat, params["fft_sample_rate"]
示例#23
0
def mel_to_sound(dst_sound_file_path,
                 mel_spectrogram,
                 sample_rate,
                 method='griffin_lim'):
    '''
    converts mel spectrogram (librosa object) to sound file with defined sample rate and 1 method:
    griffin_lim
    https://paperswithcode.com/method/griffin-lim-algorithm
    '''
    inverted_features = librosa.feature.inverse.mel_to_stft(mel_spectrogram)
    audio_signal = librosa.griffinlim(inverted_features)
    scipy.io.wavfile.write(dst_sound_file_path, audio_signal, sample_rate)
示例#24
0
def griffinlim_sp(spectrogram, n_fft, win_length, hop_length):
    if win_length is None:
        win_length = n_fft
    if hop_length is None:
        hop_length = win_length // 4
    s = librosa.griffinlim(
        spectrogram,
        n_iter=50,
        hop_length=hop_length,
        win_length=win_length,
        momentum=0.5,
    )
    return s / np.max(np.abs(s))
示例#25
0
 def amp_sp_to_raw(amp_sp: np.array,
                   fs: int,
                   hop_size_ms: int = 5,
                   preemphasis: float = 0.00):
     """
     Transform the amplitude spectrum into the waveform with
     Griffin-Lim. The amplitude spectrum has to have the pitch
     information. Using amplitude spectrum which was extracted with
     pitch aligned windows (as WORLD does it) will not work.
     """
     raw = librosa.griffinlim(amp_sp.T * np.sqrt(amp_sp.shape[1]),
                              hop_length=int(fs * hop_size_ms / 1000.))
     return AudioProcessing.depreemphasis(raw, preemphasis)
示例#26
0
    def synthesize(self, mel):
        mel = self._denormalize_from_VC(mel)

        lin_out = librosa.feature.inverse.mel_to_stft(mel,
                                                      sr=Config.audio_sr,
                                                      n_fft=Config.n_fft,
                                                      fmin=Config.fmin,
                                                      fmax=Config.fmax)
        waveform = librosa.griffinlim(lin_out,
                                      win_length=Config.win_length,
                                      hop_length=Config.hop_length)

        return waveform
示例#27
0
 def spec2audio(cls,
                spec: np.ndarray,
                n_iter: int = 60,
                enhancement_factor: float = 1.5) -> np.ndarray:
     """Converts magnitude spectrogram into a waveform using Griffin-Lim
     algorithm and istft"""
     spec = cls.denormalize(spec)
     spec = cls.db2lin(spec + AudioProcessParam.ref_level_db)
     spec = np.power(spec, enhancement_factor)  # enhance
     waveform = librosa.griffinlim(spec,
                                   n_iter=n_iter,
                                   hop_length=cls.param.hop_length)
     # TODO: de-emphasize the waveform?
     return cls.de_emphasize(waveform)
示例#28
0
def griffinlim(*targets):
    try:
        print("Griffinlim booted")
        while True:
            S = (yield)
            # Invert using Griffin-Lim
            y_inv = librosa.griffinlim(S)
            for t in targets:
                print(t)
                t.send(y_inv)
    except GeneratorExit:
        print("Griffinlim shutdown")
        for t in targets:
            t.close()
示例#29
0
 def load_vocal_audio(self, y, sr):
     S_full, phase = librosa.magphase(librosa.stft(y))
     S_filter = librosa.decompose.nn_filter(S_full,
                                            aggregate=np.median,
                                            metric='cosine',
                                            width=int(librosa.time_to_frames(2, sr=sr)))
     S_filter = np.minimum(S_full, S_filter)
     margin_i, margin_v = 2, 10
     power = 2
     mask_v = librosa.util.softmask(S_full - S_filter,
                                    margin_v * S_filter,
                                    power=power)
     S_foreground = mask_v * S_full
     output_data = librosa.griffinlim(S_foreground)
     return output_data, sr
示例#30
0
def spec_to_waveform(spec, min_level_db, ref_level_db, power, n_iter,
                     win_length, hop_length, preemphasis):
    """Convert output linear spec to waveform using griffin-lim vocoder.
    
    Args:
        spec (ndarray): the output linear spectrogram, shape(C, T), where C means n_fft, T means frames.
    """
    denoramlized = np.clip(spec, 0, 1) * (-min_level_db) + min_level_db
    lin_scaled = np.exp((denoramlized + ref_level_db) / 20 * np.log(10))
    wav = librosa.griffinlim(lin_scaled**power,
                             n_iter=n_iter,
                             hop_length=hop_length,
                             win_length=win_length)
    if preemphasis > 0:
        wav = signal.lfilter([1.], [1., -preemphasis], wav)
    return wav