def deconv_a_file(filename):

	song_id = filename.split('.')[0]

	path_out_here = path_results + song_id + '/'
	# path_img_here = path_results + song_id + '_img/'
	SRC = np.load(path_SRC + filename)
	if os.path.exists(path_out_here):
		print '%s might be done already, I skip this.' % song_id
		print 'remove %s and %s to proceed.' % (path_out_here, path_img_here)
	if not os.path.exists(path_out_here):
		os.makedirs(path_out_here)	
	# if not os.path.exists(path_img_here):
	# 	os.makedirs(path_img_here)
	filename_out = '%s_a_original.wav' % (song_id)	
	librosa.output.write_wav(path_out_here + filename_out, librosa.istft(SRC, hop_length=N_FFT/2), 
								sr=SAMPLE_RATE, 
								norm=True)

	for depth in depths:	
		print '--- deconve! ---'
		deconvedMASKS = auralise.get_deconve_mask(W[:depth], layer_names, SRC, depth) # size can be smaller than SRC due to downsampling

		print 'result; %d masks with size of %d, %d' % deconvedMASKS.shape

		for deconved_feature_ind, deconvedMASK_here in enumerate(deconvedMASKS):

			MASK = np.zeros(SRC.shape)
			MASK[0:deconvedMASK_here.shape[0], 0:deconvedMASK_here.shape[1]] = deconvedMASK_here
			deconvedSRC = np.multiply(SRC, MASK)

			filename_out = '%s_deconved_from_depth_%d_feature_%d.wav' % (song_id, depth, deconved_feature_ind)
			librosa.output.write_wav(path_out_here + filename_out, librosa.istft(deconvedSRC, hop_length=N_FFT/2), 
								sr=SAMPLE_RATE, 
								norm=True)
示例#2
0
def audio_to_chroma_and_onset_strength(audio, fs = 22050, hop = 512):
  H,P = librosa.decompose.hpss(librosa.stft(audio))
  audio_harmonic = librosa.istft(H)
  audio_percussive = librosa.istft(P)
  chroma_gram = librosa.feature.chromagram(audio_harmonic)
  audio_onset_strength = librosa.onset.onset_strength(audio_percussive, hop_length = hop/4, sr = fs)
  return chroma_gram, audio_onset_strength
示例#3
0
def audio_to_cqt_and_onset_strength(audio, fs=22050, hop=512):
    '''
    Feature extraction for audio data.
    Gets a power CQT of harmonic component and onset strength signal of percussive.
    
    Input:
        midi - pretty_midi.PrettyMIDI object
        fs - sampling rate to synthesize audio at, default 22050
        hop - hop length for cqt, default 512, onset strength hop will be 1/4 of this
    Output:
        audio_gram - CQT of audio data
        audio_onset_strength - onset strength signal
    '''
    # Use harmonic part for gram, percussive part for onsets
    H, P = librosa.decompose.hpss(librosa.stft(audio))
    audio_harmonic = librosa.istft(H)
    audio_percussive = librosa.istft(P)
    # Compute log-frequency spectrogram of original audio
    audio_gram = np.abs(librosa.cqt(y=audio_harmonic,
                                    sr=fs,
                                    hop_length=hop,
                                    fmin=librosa.midi_to_hz(36),
                                    n_bins=60))**2
    # Beat track the audio file at 4x the hop rate
    audio_onset_strength = librosa.onset.onset_strength(audio_percussive, hop_length=hop/4, sr=fs)
    return audio_gram, audio_onset_strength
示例#4
0
def extend_dataset(y, sr):

        #return (y,)

	# Make 2x faster
	D       = librosa.stft(y, n_fft=2048, hop_length=512)

	D_fast  = librosa.phase_vocoder(D, 2.0, hop_length=512)
	y_fast  = librosa.istft(D_fast, hop_length=512)

	# Concatenate two 2x frames together
	y_fast = append(y_fast, y_fast)

	# Make 2x slower
	D_slow  = librosa.phase_vocoder(D, 0.5, hop_length=512)
	y_slow  = librosa.istft(D_slow, hop_length=512)

	# split two 0.5x frames together
	y_slow1, y_slow2 = split(y_slow, 2)

	## Frequency scaling
	#y_pitch_up = librosa.effects.pitch_shift(y, sr, n_steps=4)
	#y_pitch_down = librosa.effects.pitch_shift(y, sr, n_steps=-4)

        samples = min([len(y), len(y_fast), len(y_slow1), len(y_slow2)])
        y = y[:samples]
        y_fast = y_fast[:samples]
        y_slow1 = y_slow1[:samples]
        y_slow2 = y_slow2[:samples]

	return (y, y_fast, y_slow1, y_slow2)
示例#5
0
def hpss(y):

    D = librosa.stft(y)
    H, P = librosa.decompose.hpss(D, kernel_size=KERNEL_SIZE, power=HPSS_P)

    D_harm = np.abs(librosa.stft(librosa.istft(H), n_fft=N_FFT, hop_length=HOP))
    D_perc = np.abs(librosa.stft(librosa.istft(P), n_fft=N_FFT, hop_length=HOP))

    return D_harm, D_perc
示例#6
0
def test_istft_bad_window():

    D = np.zeros((1025, 10), dtype=np.complex64)

    n_fft = 2 * (D.shape[0] - 1)

    window = np.ones(n_fft // 2)

    librosa.istft(D, window=window)
示例#7
0
def get_hpss(y, PARAMETERS):
    '''Separate harmonic and percussive audio time series'''   
    # Get the STFT
    D = librosa.stft(y, **PARAMETERS['stft'])
    
    
    # Get the HPSS
    D_h, D_p = librosa.decompose.hpss(D, **PARAMETERS['hpss'])

    y_h = librosa.istft(D_h, hop_length=PARAMETERS['stft']['hop_length'])
    y_p = librosa.istft(D_p, hop_length=PARAMETERS['stft']['hop_length'])
    
    return y_h, y_p
    def __test(infile):
        DATA    = load(infile)

        Dinv    = librosa.istft(DATA['D'],  n_fft       = DATA['nfft'][0,0].astype(int),
                                            hann_w      = DATA['hann_w'][0,0].astype(int),
                                            hop_length  = DATA['hop_length'][0,0].astype(int))
        assert numpy.allclose(Dinv, DATA['Dinv'])
示例#9
0
def istft(file,
          stft_mat,
          frame_length=1024,
          frame_shift=256,
          center=False,
          window="hann",
          transpose=True,
          norm=None,
          fs=16000,
          nsamps=None):
    if transpose:
        stft_mat = np.transpose(stft_mat)
    samps = audio_lib.istft(
        stft_mat,
        frame_shift,
        frame_length,
        window=window,
        center=center,
        length=nsamps)
    samps_norm = np.linalg.norm(samps, np.inf)
    # renorm if needed
    if not norm:
        samps = samps * norm / samps_norm
    samps_int16 = (samps * MAX_INT16).astype(np.int16)
    fdir = os.path.dirname(file)
    if fdir and not os.path.exists(fdir):
        os.makedirs(fdir)
    audio_lib.output.write_wav(file, samps_int16, fs)
示例#10
0
def compute_cqt(filename):
    a, sr = librosa.load(filename, sr=SR)
    spectrum = librosa.stft(a)
    harm_spec, _ = librosa.decompose.hpss(spectrum)
    harm = librosa.istft(harm_spec)
    cqt = np.abs(librosa.cqt(harm, sr=sr, hop_length=HOP, real=False))
    return cqt
示例#11
0
def stretch_demo(input_file, output_file, speed):
    '''Phase-vocoder time stretch demo function.

    :parameters:
      - input_file : str
          path to input audio
      - output_file : str
          path to save output (wav)
      - speed : float > 0
          speed up by this factor
    '''

    N_FFT       = 2048
    HOP_LENGTH  = N_FFT /4

    # 1. Load the wav file, resample
    print 'Loading ', input_file

    y, sr = librosa.load(input_file)

    # 2. generate STFT @ 2048 samples
    print 'Computing short-time fourier transform... '
    D = librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH)

    print 'Playing back at %3.f%% speed' % (speed * 100)
    D_stretch = librosa.phase_vocoder(D, speed, hop_length=HOP_LENGTH)

    y_stretch = librosa.istft(D_stretch, hop_length=HOP_LENGTH)

    print 'Saving stretched audio to: ', output_file
    librosa.output.write_wav(output_file, y_stretch, sr)
示例#12
0
def mfcc_clustering(file_name, n_clusters):
    """
    From Prem
    :return:
    """

    clusterer = KMeans(n_clusters=n_clusters)

    print(file_name)
    mix, sr = librosa.load(file_name)
    mix_stft = librosa.stft(mix)
    comps, acts = find_template(mix_stft, sr, 100, 101, 0, mix_stft.shape[1])
    cluster_comps = librosa.feature.mfcc(S=comps)[1:14]
    save_mfcc_img(file_name[:-4] + "_mfcc.png", np.flipud(cluster_comps))
    clusterer.fit_transform(cluster_comps.T)
    labels = clusterer.labels_
    # print(labels)
    sources = []

    for cluster_index in range(n_clusters):
        indices = np.where(labels == cluster_index)[0]
        template, residual = extract_template(comps[:, indices], mix_stft)
        t = librosa.istft(template)
        sources.append(t)

    return np.array(sources)
示例#13
0
def decompose_into_harmonic_and_percussive(filepath, kernel_size=(7,15), n_fft = 4096, hop_length = 1024):
  """
  Performs Harmonic/Percussive Source Separation on an audio file by applying median filters and returns each filtered version 
  as an audio signal
  ARGS
    filepath: fullpath of audio file <str>
    kernel_size: tuple sized of (harmonic, percussive) filters (<int>,<int>)
    n_fft: FFT size <int>
    hop_length : hop length <int>
  """
  signal, sr = load_signal(filepath)
  D = librosa.stft(signal, n_fft, hop_length)
  H, P = librosa.decompose.hpss(D, kernel_size=(7,15))
  signal_harm = librosa.istft(H)
  signal_perc = librosa.istft(P)
  return signal_harm, signal_perc
示例#14
0
def find_template(music_stft, sr, min_t, n_components, start, end):
    """
    from Prem
    :param music_stft:
    :param sr:
    :param min_t:
    :param n_components:
    :param start:
    :param end:
    :return:
    """
    template_stft = music_stft[:, start:end]
    layer = librosa.istft(template_stft)
    layer_rms = np.sqrt(np.mean(layer * layer))

    comps = []
    acts = []
    errors = []

    for T in range(min_t, n_components):
        transformer = NMF(n_components=T)
        comps.append(transformer.fit_transform(np.abs(template_stft)))
        acts.append(transformer.components_)
        errors.append(transformer.reconstruction_err_)

    # knee = np.diff(errors, 2)
    # knee = knee.argmax() + 2
    knee = 0

    # print 'Using %d components' % (knee + min_t)
    return comps[knee], acts[knee]
示例#15
0
def percussive(y):
    '''Extract the percussive component of an audio time series'''

    D = librosa.stft(y)
    P = librosa.decompose.hpss(D)[1]
    
    return librosa.istft(P)
示例#16
0
def midi_to_cqt(midi, sf2_path=None, fs=22050, hop=512):
    '''
    Feature extraction routine for midi data, converts to a drum-free, percussion-suppressed CQT.
    
    Input:
        midi - pretty_midi.PrettyMIDI object
        sf2_path - path to .sf2 file to pass to pretty_midi.fluidsynth
        fs - sampling rate to synthesize audio at, default 22050
        hop - hop length for cqt, default 512
    Output:
        midi_gram - Simulated CQT of the midi data
    '''
    # Synthesize the MIDI using the supplied sf2 path
    midi_audio = midi.fluidsynth(fs=fs, sf2_path=sf2_path)
    # Use the harmonic part of the signal
    H, P = librosa.decompose.hpss(librosa.stft(midi_audio))
    midi_audio_harmonic = librosa.istft(H)
    # Compute log frequency spectrogram of audio synthesized from MIDI
    midi_gram = np.abs(librosa.cqt(y=midi_audio_harmonic,
                                   sr=fs,
                                   hop_length=hop,
                                   fmin=librosa.midi_to_hz(36),
                                   n_bins=60,
                                   tuning=0.0))**2
    return midi_gram
示例#17
0
def reverse_channel(a, b, n_fft=2**13, win_length=2**12, hop_length=2**10):
    '''
    Estimates the channel distortion in b relative to a and reverses it
    
    :parameters:
        - a : np.ndarray
            Some signal
        - b : np.ndarray
            Some other signal with channel distortion relative to a
        - n_fft : int
            Number of samples in each FFT computation, default 2**13
        - win_length : int
            Number of samples in each window, default 2**12
        - hop_length : int
            Number of samples between successive FFT computations, default 2**10
    
    :returns:
        - b_filtered : np.ndarray
            The signal b, filtered to reduce channel distortion
    '''
    # Compute spectrograms
    a_spec = librosa.stft(a, n_fft=n_fft, win_length=win_length, hop_length=hop_length)
    b_spec = librosa.stft(b, n_fft=n_fft, win_length=win_length, hop_length=hop_length)
    # Compute the best filter
    H = best_filter_coefficients(a_spec, b_spec)
    # Apply it in the frequency domain (ignoring aliasing!  Yikes)
    b_spec_filtered = H*b_spec
    # Get back to time domain
    b_filtered = librosa.istft(b_spec_filtered, win_length=win_length, hop_length=hop_length)
    return b_filtered
示例#18
0
文件: gotify.py 项目: rabitt/gotify
def render_audio(new_stft, sr, fpath, y_orig, mix=False):
    assert not np.any(np.isnan(new_stft))
    audio = librosa.istft(new_stft, hop_length=HOP)
    if mix:
        min_len = np.min([len(audio), len(y_orig)])
        audio = audio[:min_len] + y_orig[:min_len]
    librosa.output.write_wav(fpath, audio, sr)
示例#19
0
def get_freq_component(y, k=4):
    components, activations, phase = decompose(y)
    D_k = np.multiply.outer(components[:, k], activations[k])

    # invert the stft after putting the phase back in
    y_k = librosa.istft(D_k * phase)
    return y_k
示例#20
0
def reconstruct(components, activations, phase):
    # Play back the reconstruction
    # Reconstruct a spectrogram by the outer product of component k and its activation
    D_k = components.dot(activations)

    # invert the stft after putting the phase back in
    y_k = librosa.istft(D_k * phase)
    return y_k
示例#21
0
def mix_by_chromagram(src_path, tgt_paths, n_fft = 4096, hop_length = 1024):
  print "create source sound"
  src_sound = Sound(src_path)
  targets = {}

  print "create target sounds"
  if isinstance(tgt_paths, list):
    for path in tgt_paths:
      tgt_sound = Sound(tgt_path)
      targets[path] = tgt_sound
  else:
    tgt_sound = Sound(tgt_paths)
    targets[tgt_paths] = tgt_sound

  #zeros chromagram
  zeros = src_sound.getChromagram()[0]*0
  
  #IMPLEMENT cut all arrays such that they have same length!
  print "create temporary magnitude and phase containers"
  tmp_mag = None
  tmp_phase = None

  ratio = len(src_sound.getSpectra().getMagnitude()) / len(src_sound.getChromagram()[0])
  print "block size", ratio

  #Compute distances
  print "compute distances"
  for i in range(len(src_sound.getChromagram()[0]) -1):
    print "computing frame block", i
    distance = None
    closest = zeros
    for target in targets.values():      
      try:
        new_dist = norm(np.transpose(target.getChromagram())[i] - np.transpose(src_sound.getChromagram())[i])
        if new_dist < distance or distance == None:
          distance = new_dist
          closest = target.spectra
      except IndexError:
        print 'IDX Error'
    try:
      cap = min(len(closest.getMagnitude(i)) , len(src_sound.spectra.getMagnitude(i)))
      #Add magnitudes and phases
      for j in range(ratio):
        if tmp_mag == None:
          tmp_mag = src_sound.spectra.getMagnitude(i*ratio + j)[:cap] + closest.getMagnitude(i*ratio+j)[:cap]
          tmp_phase = src_sound.spectra.getPhase(i*ratio +j)[:cap] + closest.getPhase(i*ratio + j)[:cap]
        else: 
          tmp_mag = np.vstack((tmp_mag, src_sound.spectra.getMagnitude(i*ratio +j)[:cap] + closest.getMagnitude(i*ratio+j)[:cap]))
          tmp_phase = np.vstack((tmp_phase, src_sound.spectra.getPhase(i*ratio +j)[:cap] + closest.getPhase(i*ratio + j)[:cap]))    
    except AttributeError:
      print 'Attribute Error'
    
  #Average magnitudes and phases
  tmp_mag *= 0.5
  tmp_phase *= 0.5  
    
  signal = librosa.istft(tmp_mag * tmp_phase)
  librosa.output.write_wav(src_sound.path[:-4]+"-mix.wav", signal, 2*src_sound.sr)
def specs_to_wavs_istft_batch(magnitudes, phases, hop_length):

    stft_matrices = combine_magnitdue_phase(magnitudes = magnitudes, phases = phases)

    wavs = list()
    for magnitude, phase in zip(magnitudes, phases):
        wav = librosa.istft(stft_matrices, hop_length = hop_length)
        wavs.append(wav)

    wavs = np.array(wavs)

    return wavs
示例#23
0
def griffinlim(spectrogram, n_iter=50, window='hann', n_fft=2048, win_length=2048, hop_length=-1, verbose=False):
    if hop_length == -1:
        hop_length = n_fft // 4

    angles = np.exp(2j * np.pi * np.random.rand(*spectrogram.shape))

    t = tqdm(range(n_iter), ncols=100, mininterval=2.0, disable=not verbose)
    for i in t:
        full = np.abs(spectrogram).astype(np.complex) * angles
        inverse = librosa.istft(full, hop_length = hop_length, win_length = win_length, window = window)
        rebuilt = librosa.stft(inverse, n_fft = n_fft, hop_length = hop_length, win_length = win_length, window = window)
        angles = np.exp(1j * np.angle(rebuilt))

        if verbose:
            diff = np.abs(spectrogram) - np.abs(rebuilt)
            t.set_postfix(loss=np.linalg.norm(diff, 'fro'))

    full = np.abs(spectrogram).astype(np.complex) * angles
    inverse = librosa.istft(full, hop_length = hop_length, win_length = win_length, window = window)

    return inverse
示例#24
0
def spectrogram2wav(mag, n_fft, win_length, hop_length, num_iters, phase_angle=None, length=None):
    assert(num_iters > 0)
    if phase_angle is None:
        phase_angle = np.pi * np.random.rand(*mag.shape)
    spec = mag * np.exp(1.j * phase_angle)
    for i in range(num_iters):
        wav = librosa.istft(spec, win_length=win_length, hop_length=hop_length, length=length)
        if i != num_iters - 1:
            spec = librosa.stft(wav, n_fft=n_fft, win_length=win_length, hop_length=hop_length)
            _, phase = librosa.magphase(spec)
            phase_angle = np.angle(phase)
            spec = mag * np.exp(1.j * phase_angle)
    return wav
示例#25
0
文件: hpss.py 项目: BWalburn/librosa
def hpss_demo(input_file, output_harmonic, output_percussive):
    '''HPSS demo function.

    :parameters:
      - input_file : str
          path to input audio
      - output_harmonic : str
          path to save output harmonic (wav)
      - output_percussive : str
          path to save output harmonic (wav)
    '''

    N_FFT       = 2048
    HOP_LENGTH  = N_FFT /4

    # 1. Load the wav file, resample
    print 'Loading ', input_file

    y, sr = librosa.load(input_file)

    # 2. generate STFT @ 2048 samples
    print 'Computing short-time fourier transform... '
    D = librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH)

    # 3. HPSS.  The default kernel size isn't necessarily optimal, but works okay enough
    print 'Separating harmonics and percussives... '
    harmonic, percussive = librosa.decompose.hpss(D)

    # 4. Invert STFT
    print 'Inverting harmonics and percussives... '
    y_harmonic   = librosa.istft(harmonic, hop_length=HOP_LENGTH)
    y_percussive = librosa.istft(percussive, hop_length=HOP_LENGTH)

    # 5. Save the results
    print 'Saving harmonic audio to: ', output_harmonic
    librosa.output.write_wav(output_harmonic, y_harmonic, sr)

    print 'Saving percussive audio to: ', output_percussive
    librosa.output.write_wav(output_percussive, y_percussive, sr)
示例#26
0
def decompose_save(filepath, kernel_size=(5,17), n_fft = 4096, hop_length = 1024):
  """
  Performs Harmonic/Percussive Source Separation on an audio file by applying median filters and saves each filtered file and
  a mix of them as an audio file.
  ARGS
    filepath: fullpath of audio file <str>
    kernel_size: tuple sized of (harmonic, percussive) filters (<int>,<int>)
    n_fft: FFT size <int>
    hop_length : hop length <int>

  """
  signal, sr = load_signal(filepath)
  D = librosa.stft(signal, n_fft, hop_length)
  H, P = librosa.decompose.hpss(D, kernel_size=(5,17))

  signal_harm = librosa.istft(H)
  signal_perc = librosa.istft(P)
  signal_mix = librosa.istft(D)

  librosa.output.write_wav(filepath[:-4]+"-harm.wav", signal_harm, sr)
  librosa.output.write_wav(filepath[:-4]+"-perc.wav", signal_perc, sr)
  librosa.output.write_wav(filepath[:-4]+"-mix.wav", signal_mix, sr)
def extract_all_layers(music_path, parameters=None, n_components = 8, beats = None):
	music, sr, music_stft = load_file(music_path)
	if beats == 'quantize':
		beats = quantize_track(music, sr)
	original_rms = np.sqrt(np.mean(music*music))
	layers = []
	boundaries = []
	template, residual, errors, beats, inflection_point, beat, template_error, start = get_layer(music, sr, 0, beats=beats, parameters=parameters, n_components=n_components)
	while True:
		layers.append(librosa.istft(template))
		boundaries.append(beats[beat])
		print 'LAYER: ' + str(len(layers))
		if np.sqrt(np.mean(music*music)) < original_rms/5:
			print 'Residual rms too low, terminating'
			break
		if beat >= len(beats)-8:
			print 'Went to end of file, terminating'
			break
		music = librosa.istft(residual)
		start = beat
		template, residual, errors, beats, inflection_point, beat, template_error, start = get_layer(music, sr, start, beats, parameters=parameters)
	return layers, boundaries, music_stft
 def reconstruct_from_magnitude(self, stft_mag, it=100):
     n_fft = (stft_mag.shape[0] - 1) * 2
     x = np.random.randn((stft_mag.shape[1] - 1) * self.hop_length)
     for i in range(it):
         stft_rec = lbr.stft(x, n_fft=n_fft, hop_length=self.hop_length)
         angle = np.angle(stft_rec)
         my_stft = stft_mag * np.exp(1.0j * angle)
         if self.verbose: # and i == it - 1:
             prev_x = x
         x = lbr.istft(my_stft, hop_length=self.hop_length)
         if self.verbose:  # and i == it - 1:
             mse = np.sqrt(np.square(x - prev_x).sum())  # logmse would be more appropriate?
             print('MSE between sub- and ultimate iteration: {}'.format(mse))
     return x
def spec_to_wav_batch(stft_matrices, hop_length = None):

    # Every stft matrix in stft matrices may have complex numbers

    assert (stft_matrices.ndim == 3), 'Single stft maxtrix uses librosa.istft() directly'

    wavs = list()

    for stft_matrix in stft_matrices:
        wav = librosa.istft(stft_matrix, hop_length = hop_length)
        wavs.append(wav)

    wavs = np.array(wavs)

    return wavs
示例#30
0
    def __test(infile):
        DATA    = load(infile)

        if DATA['hann_w'][0,0] == 0:
            window      = np.ones
            win_length  = 2 * (DATA['D'].shape[0] - 1)
        else:
            window      = None
            win_length  = DATA['hann_w'][0,0]
            
        Dinv    = librosa.istft(DATA['D'],  hop_length  = DATA['hop_length'][0,0].astype(int),
                                            win_length  = win_length,
                                            window      = window)

        assert np.allclose(Dinv, DATA['Dinv'])
示例#31
0
def SaveAudio(file_path, mag, phase) :
    y = librosa.istft(mag*phase,win_length=window_size,hop_length=hop_length)
    librosa.output.write_wav(file_path,y,SR,norm=True)
    print(file_path + " Save complete!!")
示例#32
0
def ac_tempogram(y: np.ndarray) -> np.ndarray:
    D = delta_spectral(y)**2
    D = librosa.istft(D, win_length=2048, hop_length=2048)
    return D.reshape(1025, -1)
示例#33
0
 def decode(self, A):
     return librosa.istft(randomise_phase(self.comps.dot(A)))
示例#34
0
def create_audio_from_spectrogram(spec):
	spec_transposed = tf.transpose(spec).eval()
	return librosa.istft(spec_transposed, Config.hop_length)
示例#35
0
def _istft(y):
	return librosa.istft(y, hop_length=get_hop_size())
示例#36
0
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

model.fit(noisyInput, cleanOutput, epochs=20, validation_split=0.2)
#--------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------------reconstruct--------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------

reconstructed = model.predict(noisyInput)

reconstructed = reconstructed.reshape(reconstructed.shape[0] // 626, 626, 155)
noisy_phase = noisy_phase.reshape(noisy_phase.shape[0] // 129, 129, 626)
#reconstructed=cleanOutput
print(reconstructed.shape)

for k in range(reconstructed.shape[0]):
    suma = []
    for i in range(reconstructed.shape[1]):
        for j in range(129):
            reconstructed[k][i][j] = math.sqrt(math.exp(
                reconstructed[k][i][j]))
        suma.append(reconstructed[k][i])
    suma = np.array(suma)
    suma = suma.T
    the_real_STFT = suma[:-26:]
    print('the_rere', the_real_STFT.shape, the_real_STFT)
    the_rec_stft = the_real_STFT * noisy_phase[k]

    the_rec_signal = librosa.istft(the_rec_stft, hop_length=128)
    # all_sounds[k]=the_rec_signal
    scipy.io.wavfile.write('recSignal_{}.wav'.format(k), 8000, the_rec_signal)
        grad_values = np.copy(self.grad_values)
        self.loss_value = None
        self.grad_values = None
        return grad_values


evaluator = Evaluator()

# run scipy-based optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the neural style loss
x = base_array

for i in range(iterations):
    print('Start of iteration', i)
    print('sr:')
    print(base_sr)
    start_time = time.time()
    x, min_val, info = fmin_l_bfgs_b(evaluator.loss,
                                     x.flatten(),
                                     fprime=evaluator.grads,
                                     maxfun=20)
    print('Current loss value:', min_val)
    # save current generated image
    img = deprocess_image(x.copy(), base_phases, img_nrows, img_ncols)
    out = librosa.istft(img)
    fname = result_prefix + '_at_iteration_%d.wav' % i
    pysndfile.sndio.write(fname, out, rate=base_sr, format='wav', enc='pcm16')
    end_time = time.time()
    print('Image saved as', fname)
    print('Iteration %d completed in %ds' % (i, end_time - start_time))
示例#38
0
 def _istft(self, y):
     return librosa.istft(y,
                          hop_length=self.hop_length,
                          win_length=self.win_length)
示例#39
0
def to_wav(mag, phase, len_hop=ModelConfig.L_HOP):
    stft_matrix = get_stft_matrix(mag, phase)
    return np.array(
        list(map(lambda s: librosa.istft(s, hop_length=len_hop), stft_matrix)))
示例#40
0
def main():
    # ===== Arguments ===== #
    parser = argparse.ArgumentParser()
    parser.add_argument("--gpu",
                        "-g",
                        type=int,
                        default=-1,
                        help="specify GPU")
    parser.add_argument("--model_path", "-m", type=str)
    parser.add_argument("--units",
                        "-u",
                        type=int,
                        default=5000,
                        help="# of FC units")
    parser.add_argument("--data_visual", type=str, default=DATA_DIR_VISUAL)
    parser.add_argument("--data_speech", type=str, default=DATA_DIR_SPEC)
    parser.add_argument("--result_dir", type=str, default="RESULT/separation/")
    args = parser.parse_args()

    # ===== GPU or CPU ===== #
    if args.gpu >= 0:
        xp = cuda.cupy
        cuda.get_device(args.gpu).use()
    else:
        xp = np

    # ===== Load model ===== #
    print("loading model...")
    model = Audio_Visual_Net(spec_len=SPEC_LEN,
                             gpu=args.gpu,
                             num_fusion_units=args.units)
    if args.gpu >= 0:
        model.to_gpu(args.gpu)
    if args.model_path.find("snapshot") > -1:
        chainer.serializers.load_npz(args.model_path,
                                     model,
                                     path="updater/model:main/")
    else:
        chainer.serializers.load_npz(args.model_path, model)

    # ===== Load test data ===== #
    print("loading test data...")
    spec_input = sorted(glob.glob(os.path.join(args.data_speech, "*.npz")))
    vis_input = sorted(glob.glob(os.path.join(args.data_visual, "*")))
    #assert len(spec_input)==len(vis_input), "# of files are different between faces and audios."
    l_input = len(spec_input)
    test = []
    spec_input = [
        os.path.join(args.data_speech, "{}.npz".format(i)) for i in range(5)
    ]
    vis_input = [
        os.path.join(args.data_visual, "{}".format(i)) for i in range(5)
    ]

    for i in range(5):
        _num = int(os.path.basename(spec_input[i]).split(".")[0])
        _spec_input_mix, _phase = LoadAudio(
            fname=os.path.join(DATA_DIR_MIX, "{}.wav".format(_num)))
        _mag = _spec_input_mix.T[np.newaxis, :, :]
        _phase = _phase.T[np.newaxis, :, :]
        _vis_input1 = xp.array(
            pd.read_csv(os.path.join(vis_input[0], "speech1.csv"),
                        header=None)).astype(xp.float32) / 255.
        _vis_input2 = xp.array(
            pd.read_csv(os.path.join(vis_input[0], "speech2.csv"),
                        header=None)).astype(xp.float32) / 255.
        _vis_input1 = _vis_input1.T[:, :, np.newaxis]
        _vis_input2 = _vis_input2.T[:, :, np.newaxis]
        test.append((_mag, _vis_input1, _vis_input2, _phase))

    # ===== Separate mixed speeches ===== #
    print("start saparating...")
    if not os.path.exists(args.result_dir):
        os.makedirs(args.result_dir)
    with chainer.using_config("train", False):
        for i in range(l_input):
            print("{}/{}".format(i + 1, l_input))
            loop = int(math.ceil(test[i][0].shape[1] // SPEC_LEN))
            speech1 = []
            speech2 = []
            phase = xp.array(test[i][3][0, :, :].T)
            for l in range(loop):
                # we have to reshape test data because we must add batch size dimension
                _spec = test[i][0][np.newaxis, :,
                                   (SPEC_LEN * l):(SPEC_LEN * (l + 1)), :]
                _face1 = test[i][1][np.newaxis, :,
                                    (FACE_LEN * l):(FACE_LEN * (l + 1)), :]
                _face2 = test[i][2][np.newaxis, :,
                                    (FACE_LEN * l):(FACE_LEN * (l + 1)), :]
                y = model.separateSpectrogram(spec=_spec,
                                              face1=_face1,
                                              face2=_face2)
                y = y.data
                mask1 = xp.array(y[0, :, :257].T)
                mask2 = xp.array(y[0, :, 257:].T)
                _phase = phase[:, (SPEC_LEN * l):(SPEC_LEN * (l + 1))]
                d1 = chainer.cuda.to_cpu(mask1 * _phase)
                d2 = chainer.cuda.to_cpu(mask2 * _phase)
                speech1.append(
                    istft(d1, hop_length=HOP_LEN, win_length=FFT_SIZE))
                speech2.append(
                    istft(d2, hop_length=HOP_LEN, win_length=FFT_SIZE))
            speech1 = np.concatenate(speech1)
            speech2 = np.concatenate(speech2)
            write_wav(path="{}/{}-speech1.wav".format(args.result_dir, i),
                      y=speech1,
                      sr=SR,
                      norm=True)
            write_wav(path="{}/{}-speech2.wav".format(args.result_dir, i),
                      y=speech2,
                      sr=SR,
                      norm=True)

    print("done!!")
示例#41
0
plt.subplot(3, 1, 2)
background = librosa.amplitude_to_db(S_background, ref=np.max)
librosa.display.specshow(background, y_axis='log', sr=sr)
plt.title('Background')
plt.colorbar()

plt.subplot(3, 1, 3)
foreground = librosa.amplitude_to_db(S_foreground, ref=np.max)
librosa.display.specshow(foreground, y_axis='log', x_axis='time', sr=sr)
plt.title('Foreground')
plt.colorbar()

plt.tight_layout()
plt.show()

full_audio = librosa.istft(S_full)
foreground_audio = librosa.istft(S_foreground)
background_audio = librosa.istft(S_background)

####################################################
# Print out some metadata of the original audio and the 3 derived streams
print("sr: {}".format(sr))
print("orig({}) max {} power {}: {}".format(len(source_audio),
                                            audioop.max(source_audio, 2),
                                            audioop.rms(source_audio, 2),
                                            source_audio))
print("full({}) max {} power {}: {}".format(len(full_audio),
                                            audioop.max(background_audio, 2),
                                            audioop.rms(full_audio, 2),
                                            full_audio))
print("foreground({}) max {} power {}: {}".format(
示例#42
0
                               margin_i * (S_full - S_filter),
                               power=power)

mask_v = librosa.util.softmask(S_full - S_filter,
                               margin_v * S_filter,
                               power=power)

# Once we have the masks, simply multiply them with the input spectrum
# to separate the components

S_foreground = mask_v * S_full
S_background = mask_i * S_full

# get audio from the foreground audio
d_foreground = S_foreground * phase
y_hat = librosa.istft(d_foreground)
librosa.output.write_wav(dest_vocal_filename, y_hat, sr=sr)

# get audio from the background audio
d_background = S_background * phase
y_hat = librosa.istft(d_background)
librosa.output.write_wav(dest_bg_filename, y_hat, sr=sr)

# sphinx_gallery_thumbnail_number = 2

plt.figure(figsize=(12, 8))
plt.subplot(3, 1, 1)
librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max),
                         y_axis='log',
                         sr=sr)
plt.title('Full spectrum')
示例#43
0
def core(input_path,
         output_path,
         output_sr=48000,
         inter_sr=1,
         test_mode=False,
         opti_mode=True,
         dyn_protect=True,
         harmonic_hpfc=6000,
         harmonic_sft=16000,
         harmonic_gain=1.2,
         percussive_hpfc=6000,
         percussive_stf=16000,
         percussive_gain=2.5,
         update=None,
         msgbox=None):
    def hpd_n_shift(data, lpf, sft, gain):
        sr = output_sr * inter_sr
        # 高通滤波
        b, a = signal.butter(3, lpf / (sr / 2), 'high')
        data = librosa.stft(signal.filtfilt(b, a, librosa.istft(data)))
        # 拷贝频谱
        for i in range(data.shape[1]):
            update.emit(i / data.shape[1])
            shift = sft
            shift_point = round(shift / (sr / data.shape[0]))
            # 调制
            for p in reversed(range(len(chan[:, i]))):
                data[:, i][p] = data[:, i][p - shift_point]
        # 高通滤波
        data = librosa.stft(signal.filtfilt(b, a, librosa.istft(data)))
        data *= gain
        return data

    # Dyn Protect Tips
    if dyn_protect:
        msgbox.emit("提示", "动态范围保护特性已启用\n", 1)

    # 加载音频
    y, sr = librosa.load(input_path, mono=False, sr=None)
    if test_mode:
        y, sr = librosa.load(input_path,
                             mono=False,
                             sr=None,
                             offset=round(len(y[0]) / sr / 2),
                             duration=5)
    y = resampy.resample(y, sr, output_sr * inter_sr, filter='kaiser_fast')
    # 产生 STFT 谱
    stft_list = [librosa.stft(chan) for chan in y]

    # 谐波增强模式
    for chan in stft_list:
        D_harmonic, D_percussive = librosa.decompose.hpss(chan, margin=4)
        D_harmonic = hpd_n_shift(D_harmonic, harmonic_hpfc, harmonic_sft,
                                 harmonic_gain)
        D_percussive = hpd_n_shift(D_percussive, percussive_hpfc,
                                   percussive_stf, percussive_gain)

        if not dyn_protect:
            chan += D_harmonic + D_percussive
        else:
            # 动态范围保护
            adp = D_harmonic + D_percussive
            adp_power = np.mean(np.abs(adp))
            src_power = np.mean(np.abs(chan))
            src_f = 1 - (adp_power / src_power)
            adp += src_f * chan
            chan *= 0
            chan += adp

    # 合并输出
    istft_list = [librosa.istft(chan) for chan in stft_list]
    final_data = resampy.resample(np.array(istft_list),
                                  output_sr * inter_sr,
                                  output_sr,
                                  filter='kaiser_fast')
    try:
        librosa.output.write_wav(output_path, final_data, output_sr)
    except PermissionError:
        msgbox.emit("警告",
                    "无法写入文件,请检查目标路径写入权限" \
                    "以及文件是否已被其他程序开启。",
                    0)
    # 参数优化
    if not opti_mode:
        return
    optimizer(input_path, output_path, percussive_hpfc, percussive_stf,
              percussive_gain, msgbox)
示例#44
0
noisy_speech_time, sr = librosa.load(NOISY_PATH, sr=None)
noisy_speech = 10 * np.log10(
    np.abs(
        librosa.stft(
            noisy_speech_time, n_fft=512, hop_length=160, win_length=320)))
noisy_speech = normalize(noisy_speech, test_min, test_max)
noisy_phase = librosa.stft(noisy_speech_time,
                           n_fft=512,
                           hop_length=160,
                           win_length=320)

print('Saving predicted....')
predicted = model.predict(noisy_speech.T)
librosa.output.write_wav(
    '/N/u/anakuzne/Carbonate/dl_for_speech/HW3_II/py/models/normIRM_predicted.wav',
    librosa.istft(predicted.T * noisy_speech + np.angle(noisy_phase)),
    sr,
    norm=False)

print('Saving figure...')

fig1 = plt.figure(figsize=(10, 5))
plt.imshow(np.abs(predicted),
           aspect="auto",
           origin="lowest",
           extent=[0, 311, 0, 8000])
plt.xlabel("No. of samples")
plt.ylabel("Frequency")
plt.title("normIRM")
plt.savefig(
    '/N/u/anakuzne/Carbonate/dl_for_speech/HW3_II/py/models/norm_irm_predicted.png',
示例#45
0
import scipy.io.wavfile as wave
import numpy as np
import librosa

(rate, x) = wave.read('spring_16k.wav')
print np.sum(x)
x = x.T
print np.sum(librosa.istft(librosa.stft(x), dtype=np.int32))
示例#46
0
def to_wav_from_spec(stft_maxrix, len_hop=ModelConfig.L_HOP):
    return np.array(
        list(map(lambda s: librosa.istft(s, hop_length=len_hop), stft_maxrix)))
data = np.load('te_data_and_lable.npz')
test_data = data['a']
test_data = test_data / np.max(np.abs(test_data))
phase_of_test_data = np.angle(test_data)
#test_lable=data['d']
#phase_of_test_data=np.angle(test_lable)

model = load_model('my_modle.h5')
model.load_weights('./best_weights.hdf5')
estimated_magnitude = model.predict(np.abs(test_data))
#estimated_magnitude= np.abs(test_lable)

#phase=np.angle(test_data)
pre = estimated_magnitude * np.exp(1j * phase_of_test_data)
estimate = np.reshape(pre, (pre.shape[0] * pre.shape[1], pre.shape[2]))
#estimate1=np.reshape(pre,(pre.shape[2],pre.shape[0]*pre.shape[1]))

estimate = librosa.istft(estimate.T, hop_length=512)
#sd.play(estimate)
te_lable = te_lable[:len(estimate)]
te_data = te_data[:len(estimate)]
groundtruth = np.zeros((2, len(estimate)))
groundtruth[0, :] = te_lable
groundtruth[1, :] = te_data - te_lable
estim = np.zeros((2, len(estimate)))
estim[0, :] = estimate
estim[1, :] = te_data - estimate
(sdr, sir, sar, perm) = separation.bss_eval_sources(groundtruth, estim)
print("sdr={},sar={}".format(sdr, sar))
librosa.output.write_wav('estimate_test_data.wav', estimate, 44100)
示例#48
0
import librosa.display
import scipy
from scipy import signal
import matplotlib.pyplot as plt
import wave
import struct
import os


def show_spect(spect, fs, file):
    librosa.display.specshow(spect, sr=fs)
    plt.savefig(file.split('.')[0] + '.png')


file = "test.npy"

spect = np.load(file)
spect = np.reshape(spect, (257, 301))
show_spect(spect, 16000, file)

#griffin-lim法の実装
A = librosa.db_to_amplitude(spect)
theta = 0
X = A * np.cos(theta) + A * np.sin(theta) * 1j

for i in range(100):
    x = librosa.istft(X, hop_length=160, win_length=400)
    X = librosa.stft(x, n_fft=512, hop_length=160, win_length=400)
    X = A * X / np.abs(X)

librosa.output.write_wav(file.split('.')[0] + '-reconstruct.wav', x, 16000)
示例#49
0
文件: audio.py 项目: senthilk8919/TTS
 def _istft(self, y):
     _, hop_length, win_length = self._stft_parameters()
     return librosa.istft(y, hop_length=hop_length, win_length=win_length)
示例#50
0
def _istft(y, n_fft, hop_length, win_length, use_tensorflow=False):
    if use_tensorflow:
        # return librosa.istft(y, hop_length, win_length)
        return _istft_tensorflow(y.T, n_fft, hop_length, win_length)
    else:
        return librosa.istft(y, hop_length, win_length)
示例#51
0
def invert_spectrogram(spectrogram):
    '''
    spectrogram: [f, t]
    '''
    return librosa.istft(spectrogram, 160, win_length=320, window="hamming")
示例#52
0
def _istft_librosa(y, hop_length, win_length):
    return librosa.istft(y, hop_length, win_length)
示例#53
0
def _istft(y, hparams):
    return librosa.istft(y, hop_length=get_hop_size(hparams), win_length=hparams.win_size)
示例#54
0
def hp_sep(y):
    D_h, D_p = librosa.decompose.hpss(librosa.stft(y))
    return librosa.istft(D_h), librosa.istft(D_p)
示例#55
0
def reconstruct_wave(magnitude, phase):
    reconstr = librosa.istft(magnitude * phase)
    return reconstr
示例#56
0
def phase_restore(mag, random_phases, N):
    p = np.exp(1j * (random_phases))
    for i in range(N):
        _, p = librosa.magphase(
            librosa.stft(librosa.istft(mag * p), n_fft=config.N_FFT))
    return p
示例#57
0
def invert_spectrogram(spectrogram):
    '''Applies inverse fft.
    Args:
      spectrogram: [1+n_fft//2, t]
    '''
    return librosa.istft(spectrogram, hp.hop_length, win_length=hp.win_length, window="hann")
示例#58
0
def _istft(y, sr):
    _, hop_length, win_length = _stft_parameters(sr)
    return librosa.istft(y, hop_length=hop_length, win_length=win_length)
示例#59
0
def istft_transform_clean(teX, IBM):
    clean = librosa.istft(teX * IBM, hop_length=512)
    return clean
示例#60
0
def get_istft(X, time_shape, hop_length=256, **kwargs):
    return librosa.istft(X, hop_length=hop_length, length=time_shape, **kwargs)