def problem4(): # read in tada.wav rate, tada = wavfile.read('tada.wav') # upon inspection, we find that tada.wav is a stereo audio file. # we create stereo white noise that lasts 10 seconds L_white = sp.int16(sp.random.randint(-32767,32767,rate*10)) R_white = sp.int16(sp.random.randint(-32767,32767,rate*10)) white = sp.zeros((len(L_white),2)) white[:,0] = L_white white[:,1] = R_white # pad tada signal with zeros padded_tada = sp.zeros_like(white) padded_tada[:len(tada)] = tada ptada = padded_tada # fourier transforms ftada = sp.fft(ptada,axis=0) fwhite = sp.fft(white,axis=0) # inverse transform of convolution out = sp.ifft((ftada*fwhite),axis=0) # prepping output and writing file out = sp.real(out) scaled = sp.int16(out / sp.absolute(out).max() * 32767) wavfile.write('my_tada_conv.wav',rate,scaled)
def generate_audio(ifilename, ofilename, buffer_size, hop, oracle, seq_len, p, k): fs, x = wavfile.read(ifilename) xmat = [] for i in range(0, len(x), hop): new_mat = np.array(x[i:i+buffer_size]) # try changing array type? xmat.append(new_mat) xmat = np.array(xmat) s, kend, ktrace = generate(oracle, seq_len, p, k) xnewmat = xmat[:, s] framelen = len(xnewmat[0]) nframes = len(xnewmat) wsum = np.zeros(((nframes-1) * hop + framelen, 2)) win = make_win(framelen) x = np.zeros(((nframes-1) * hop + framelen, 2)) win_pos = range(0, len(x), hop) for i in range(0, nframes): # this is the overlap add sec win = make_win(len(xnewmat[i])) x[win_pos[i]:win_pos[i]+len(xnewmat[i])] = x[win_pos[i]:win_pos[i]+len(xnewmat[i])] + xnewmat[i] * win wsum[win_pos[i]:win_pos[i]+len(xnewmat[i])] = wsum[win_pos[i]:win_pos[i]+len(xnewmat[i])] + win x[hop:-hop] = x[hop:-hop] / wsum[hop:-hop] x = np.array(x, dtype=np.int32) wavfile.write(ofilename, fs, x) return x, wsum
def play2(filename): fs, data = wavfile.read(filename) spd2 = speedx(data, 4) # scaled = np.int16(data/np.max(np.abs(data)) * 32767) wavfile.write('test.wav', len(data), data) wavfile.write('spd2.wav', len(spd2), spd2)
def generate_bit(name): offset = 240 l = 96 count = 2 sample = numpy.zeros(3 * l) sample[l] = 1 sample[2 * l] = -1 # Apply the data-shaping filter sf = rds.pulse_shaping_filter(96 * 8, 228000) shapedSamples = numpy.convolve(sample, sf) out = shapedSamples[528 - 288 : 528 + 288] # [offset:offset+l*count] # plt.plot(sf) # plt.plot(out) # plt.show() iout = (out * 20000.0 / max(abs(out))).astype(numpy.dtype(">i2")) wavfile.write(u"waveform_{}.wav".format(name), sample_rate, iout) outc.write( u"float waveform_{name}[] = {{{values}}};\n\n".format(name=name, values=u", ".join(map(unicode, out / 2.5))) ) # note: need to limit the amplitude so as not to saturate when the biphase # waveforms are summed outh.write(u"extern float waveform_{name}[{size}];\n".format(name=name, size=len(out)))
def save_wavfile(signal, file_prefix, rate=16000): num_samples = signal.shape[0] time_length = signal.shape[1] for s in xrange(num_samples): file_path = file_prefix+'_{}.wav'.format(s) wav.write(file_path, rate, signal[s][:])
def convert_gen_to_out(data_dir): gen_dir = os.path.join(data_dir, 'gen') out_dir = os.path.join(data_dir, 'out') datatools.ensure_dir_exists(gen_dir) datatools.ensure_dir_exists(out_dir) gen_glob_file_path = os.path.join(gen_dir, '*.npy') write_flush('-- Converting gen to out...') for npy_data_file in glob.glob(gen_glob_file_path): blocks = [] filename = npy_data_file.split('/')[-1] wav_filename = os.path.join(out_dir, filename.replace('.npy','.wav')) data_as_fft = np.load(npy_data_file) for fft_block in data_as_fft: real_imag_split = fft_block.shape[0] // 2 real = fft_block[0:real_imag_split] imag = fft_block[real_imag_split:] time_domain = np.fft.ifft(real + 1.0j * imag) blocks.append(time_domain) song_blocks = np.concatenate(blocks) song_blocks = song_blocks * 32767.0 song_blocks = song_blocks.astype('int16') wavfile.write(wav_filename, 44100, song_blocks) write_flush('finished. \n')
def write_wav(self): """ Synthesizes the analysis to a WAV file in the synthesis directory """ print("Performing Resynthesis...") progress = ProgressBar( widgets=[Percentage(), Bar()], maxval=self.num_samples ).start() sines = [] for i in range(len(self.bins)): data = [sin(2 * pi * self.bins[i][0] * (x / self.frate)) for x in range(self.num_samples)] for s in data: s = s * self.bins[i][1] sines.append(data) samples = [] for i in range(len(sines[0])): s = 0 for j in range(len(sines)): s += sines[j][i] samples.append(s) progress.update(i+1) samples = scale(samples, -1.0, 1.0) wavfile.write( "../synthesis/" + self.wav_name + "_resynth.wav", self.sample_rate, array(samples) ) progress.finish()
def _check_roundtrip(realfile, rate, dtype, channels): if realfile: fd, tmpfile = tempfile.mkstemp(suffix='.wav') os.close(fd) else: tmpfile = BytesIO() try: data = np.random.rand(100, channels) if channels == 1: data = data[:,0] if dtype.kind == 'f': # The range of the float type should be in [-1, 1] data = data.astype(dtype) else: data = (data*128).astype(dtype) wavfile.write(tmpfile, rate, data) for mmap in [False, True]: rate2, data2 = wavfile.read(tmpfile, mmap=mmap) assert_equal(rate, rate2) assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype) assert_array_equal(data, data2) del data2 finally: if realfile: os.unlink(tmpfile)
def generate(freq, length, amp, samprate, frames, comptype, compname): """ generates sine wave with the specified parameters """ # creates numpy array of values equally spaced from 0 to length. # how many values in array specified by frames values = linspace(0,length,frames) # takes values and multiplies each values by 2pi * freq # Then takes the sin of each of those values # finally multiplies each value by the amplitude wave = sin(2*pi*freq*values)*amp # makes each value a 16 bit integer # this array will be written into the wav file later tone = wave.astype(int16) # requests a filename with .wav suffix from user filename = raw_input("Enter filename (needs .wav suffix): ") # makes sure .wav suffix was given try: filename[-4:] == '.wav' except: print 'ERROR: Does not have proper .wav suffix' generate(freq, length, amp, samprate, frames, comptype, compname) # check if file with this name already exists # if it does, enter another name # if it does not, sine wave is generated if os.path.exists(filename): print "File already exists." generate(freq, length, amp, samprate, frames, comptype, compname) else: write(filename, samprate, tone)
def datawrite(filename,rate,data): try: write(filename, rate, data) except IOError: print("IOError:Wrong file or file path") #TODO we will trace back and add codes for the exit code sys.exit()
def writeWaveFile(data, fileName, SRate=44100.0, normalize=False, removeDcWhenNormalizing=True): """ write an array of floats to a 16 bit wave file @param data a list of lists or numpy array containing the frame data @param fileName the output file name @param SRate the sampling frequency [Hz] @param normalize if the parameter normalize is set to True, the signal will be normalized to the maximally possible value (i.e. 1). if no normalization is performed, and if the input signal has a maximum absolute ampitude greater than 1 (i.e. if the output would be clipped), the function throws an error. @param removeDcWhenNormalizing if we're normalizing, this determines whether we should remove the DC offset before doing so. @return nothing """ if not type(data).__name__ in ["list", "ndarray"]: raise Exception("expected a list data type") numChannels = 1 valMin, valMax = None, None dataTmp = None dataType = type(data[0]).__name__ if dataType in ["list", "ndarray"]: numChannels = len(data) n = len(data[0]) dataTmp = numpy.zeros((n, numChannels)) for chIdx in range(numChannels): dataTmp2 = None dType2 = type(data[chIdx]).__name__ if dType2 == "ndarray": dataTmp2 = data[chIdx] elif dType2 == "list": dataTmp2 = numpy.array(data[chIdx], dtype=float32) else: raise Exception("channel data is not a list or a numpy array") dataTmp[0:, chIdx] = dataTmp2 del dataTmp2 else: # this is a mono file # force creating a copy, to avoid scaling the original data... dataTmp = numpy.array(data) # normalize if normalize: if removeDcWhenNormalizing: dataTmp -= dataTmp.mean() valMin = dataTmp.min() valMax = dataTmp.max() absMax = abs(valMin) if abs(valMax) > absMax: absMax = abs(valMax) dataTmp /= absMax * 1.000001 # save # print dataTmp.dtype, dataTmp.shape dataTmp *= float(2 ** 15 - 1) dataTmp2 = numpy.asarray(dataTmp, dtype=numpy.int16) sciWav.write(fileName, SRate, dataTmp2) del dataTmp, dataTmp2 gc.collect()
def main(): fs, data = wavfile.read('Music.wav') # load the data audio = data.T[0] # un flux de la stereo Xf = zTransform(audio, 44100) b, a = peaking(100,10,44100,15) b1, a1 = peaking(1000,15,44100,15) ## w,h = freqz(b, a, worN = 44100) ## plt.figure() ## plt.plot(w, 20 * np.log10(np.abs(h))) ## plt.show() ## Y1 = lfilter(b,a,Xf) ## Y = lfilter(b1,a1,Y1) Y = lfilter(b,a, Xf) finalAudio = zTransformInverse(Y, 44100).real.astype(np.int16) wavfile.write("Test.wav",fs,finalAudio)
def test(): rate, data = wavfile.read('/Users/hehehehehe/Desktop/workspace/final/data/musicvideo/musicvideo.wav') filtereddata = numpy.fft.rfft(data, axis=0) print (data) filteredwrite = numpy.fft.irfft(filtereddata, axis=0) print (filteredwrite) wavfile.write('TestFiltered.wav', rate, filteredwrite)
def main(): parser = argparse.ArgumentParser() parser.add_argument('output') args = parser.parse_args() data = [] data.extend(gen_single(0, 2)) scale = np.array([-9, -7, -5, -4, -2, 0, 2, 3]) freq = 880 * np.power(2, scale / 12.0) print map(int, freq) def gen_song(): song = [1, 5, 6, 5, 4, 3, 2, 1] song = [freq[i - 1] for i in song] print map(int, song) for f in song: data.extend(gen_single(f, 0.5) * np.log(f / 440)) def gen_chord(): chord = [1, 3, 5] chord = [freq[i - 1] for i in chord] print map(int, chord) val = gen_single(chord[0], 4) for i in chord[1:]: val += gen_single(i, 4) data.extend(val) gen_song() #gen_chord() data /= np.max(np.abs(data)) data = np.array(data) * 32767 data = data.astype('int16') wavfile.write(args.output, SAMPLE_RATE, data)
def main(argv): if len(argv) == 6: window_size = int(argv[1]) a_fname = argv[2] b_fname = argv[3] a_phase_b_mag_fname = argv[4] b_phase_a_mag_fname = argv[5] else: print( 'Usage: %s ' % argv[0] + '<WINDOW_WIDTH> <FILE_1> <FILE_2> <OUTFILE_1> <OUTFILE_2>' + ( '\n\nSwap magnitude and phase of WAV files FILE_1 and FILE_2.' '\n\nWINDOW_WIDTH: STFT frame length (integer # of samples)' '\nOUTFILE_1: phase of FILE_1, magnitude of FILE_2' '\nOUTFILE_2: phase of FILE_2, magnitude of FILE_1')) return 1 a_rate, a = wavfile.read(a_fname) b_rate, b = wavfile.read(b_fname) assert a_rate == b_rate assert a.dtype == b.dtype print('Window width: %d samples = %.3f ms' % (window_size, 1e3 * window_size / a_rate)) a_phase_b_mag, b_phase_a_mag = swap_wav_magnitude(a, b, window_size) wavfile.write(a_phase_b_mag_fname, a_rate, a_phase_b_mag) wavfile.write(b_phase_a_mag_fname, a_rate, b_phase_a_mag) return 0
def energyDenoise(self, audio_file, scale, denoised_audio_file, energy_denoising_debug): if not os.path.isfile(audio_file): return False samp_freq, signal = wavfile.read(audio_file) samples = signal.shape[0] sq_signal = signal * 1.0 if energy_denoising_debug: timearray = arange(0, samples*1.0, 1) timearray /= samp_freq timearray *= 1000.0 subplot(3,1,1) plot(timearray, signal, color = 'k') for i in range(0, len(sq_signal)): sq_signal[i] *= sq_signal[i] mean_sq = mean(sq_signal) for i in range(0, len(sq_signal)): if sq_signal[i] < scale * mean_sq: signal[i] = 0 if energy_denoising_debug: timearray = arange(0, samples*1.0, 1) timearray /= samp_freq timearray *= 1000.0 subplot(3,1,2) plot(timearray, signal, color = 'k') if energy_denoising_debug: show() wavfile.write(denoised_audio_file, samp_freq, signal) return True
def wavwrite(data, fs, nbits, fname): """ Write a numpy array as a WAV file. Parameters ---------- data : array of floats The data to be written to the WAV file. fs : int Sampling frequency of the sound. nbits : int Bit depth of the WAV file (currently only values of 16 and 32 are supported) fname : string Name of the WAV file. Examples -------- >>> wavwrite(data, 48000, 32, "file.wav") """ if nbits not in [16, 32]: print("Sorry can only write 16 or 32 bits at the moment! Exiting") return if nbits == 16: data = data*(2.**15) data = data.astype(int16) elif nbits == 32: data = data*(2.**31) data = data.astype(int32) wavfile.write(fname, fs, data) return
def write_wave_file(signal, filename, sample_rate=None): """ Write the signal to disk as a .wav file. Parameters ---------- signal : numpy array or Signal The signal to be written to file. filename : str Name of the file. sample_rate : int, optional Sample rate of the signal [Hz]. Returns ------- filename : str Name of the file. Notes ----- `sample_rate` can be 'None' if `signal` is a :class:`Signal` instance. If set, the given `sample_rate` is used instead of the signal's sample rate. Must be given if `signal` is a ndarray. """ from scipy.io import wavfile if isinstance(signal, Signal) and sample_rate is None: sample_rate = int(signal.sample_rate) wavfile.write(filename, rate=sample_rate, data=signal) return filename
def volumeshift(self, filename, factor): #Increases or decreases the volume of the array. factor = 10**(factor/20.0) fs, data = wavfile.read(filename) data = np.multiply(data, factor) print factor wavfile.write("final_form", fs, data)
def generate(self, seed=None, minutes=0.5): print "Generating module" timestep = self.time_dim*(self.sequence_dim-1) samples = minutes*self.samplerate*60 song = np.zeros(samples, dtype=np.float32) if seed is None : datafile = self.get_datafile() seed = datafile.get_data(None, range(timestep)) seed = seed[0].flatten() song[:timestep] = seed print for i in range(0, len(song)-self.time_dim-timestep, self.time_dim) : sys.stdout.write('\rGenerating %d/%d samples'%(i, samples)) sys.stdout.flush() params = self.fprop(song[i:i+timestep].reshape( (self.batch_dim, self.sequence_dim-1, self.time_dim))) try : song[i+timestep:i+timestep+self.time_dim] = self.sample_from_gmm(params) except ValueError : import ipdb ; ipdb.set_trace() write(EXP_PATH+"generation.wav", self.samplerate, song)
def wavwriteStereo(yLeft, yRight, fs, filename, inputSound): """ Write a stereo sound file from 2 arrays with the channels sounds and the sampling rate yLeft: floating point array of one dimension, yRight: floating point array of one dimension, fs: sampling rate filename: name of file to create inputSound: original sound, used for auto-attenuation of the output sound """ if yLeft.size != yRight.size: raise ValueError('wavwriteStereo: Left and Right sound samples input arrays have different sizes') yMaxMagnitude = max(abs(max(yLeft)),abs(max(yRight)),abs(min(yLeft)),abs(min(yRight))) inputMaxMagnitude = max(abs(max(inputSound)),abs(min(inputSound))) attenuationRatio = inputMaxMagnitude / yMaxMagnitude # print 'yMaxMagnitude ==',yMaxMagnitude # print 'inputMaxMagnitude ==',inputMaxMagnitude # print 'attenuationRatio ==',attenuationRatio xLeft = copy.deepcopy(yLeft) # copy array xLeft *= attenuationRatio # scale output sound to the input sound range xLeft *= INT16_FAC # scaling floating point -1 to 1 range signal to int16 range xLeft = np.int16(xLeft) # converting to int16 type xRight = copy.deepcopy(yRight) # copy array xRight *= attenuationRatio # scale output sound to the input sound range xRight *= INT16_FAC # scaling floating point -1 to 1 range signal to int16 range xRight = np.int16(xRight) # converting to int16 type xStereo = np.transpose(np.array([xLeft,xRight])) write(filename, fs, xStereo)
def stretch(self, filename, factor, window_size=2048, h=512): """ Stretches the sound by a factor """ fs, data = wavfile.read(filename) sound_array = data phase = np.zeros(window_size) hanning_window = np.hanning(window_size) result = np.zeros( len(sound_array) /factor + window_size) for i in np.arange(0, len(sound_array)-(window_size+h), h*factor): # two potentially overlapping subarrays a1 = sound_array[i: i + window_size] a2 = sound_array[i + h: i + window_size + h] # resynchronize the second array on the first s1 = np.fft.fft(hanning_window * a1) s2 = np.fft.fft(hanning_window * a2) phase = (phase + np.angle(s2/s1)) % 2*np.pi a2_rephased = np.fft.ifft(np.abs(s2)*np.exp(1j*phase)) # add to result i2 = int(i/factor) result[i2 : i2 + window_size] += hanning_window*a2_rephased result = ((2**(16-4)) * result/result.max()) # normalize (16bit) result = result.astype('int16') wavfile.write('pitchshift.wav', len(result), result) #write
def synt_all_method3(folder_in): folder_out = folder_in[:-15] + "_synt_mcep_mat" if not os.path.exists(folder_out): os.mkdir(folder_out) mcep_mats = sorted([item for item in os.listdir(folder_in) if "_mc_mat" in item]) f0s = sorted([item for item in os.listdir(folder_in) if "_f0" in item]) aper_mats = sorted([item for item in os.listdir(folder_in) if "_aper_mat" in item]) world = World(samplingrate, float(hop_length) / samplingrate * 1000) for mcep_mat_file, f0_file, aper_mat_file in zip(mcep_mats, f0s, aper_mats): print(mcep_mat_file) res = synt_from_mcep_matrix_to_spec( world, np.load(os.path.join(folder_in, f0_file)), np.load(os.path.join(folder_in, mcep_mat_file)), np.load(os.path.join(folder_in, aper_mat_file))) print("writing synth for {0}".format(mcep_mat_file)) # wavfile.write("norm.wav", 16000, normalize_int16(s)) wavfile.write( os.path.join( folder_out, mcep_mat_file.replace("mc_mat.npy", "") + "synth.wav"), sample_rate, normalize_int16(res))
def test_noise(noise_coeff=0.00): file = 'test16k.wav' fs, x = wavfile.read(file) fs, nbit, x_length, x = readwav(file) period = 5.0 opt = pyDioOption(40.0, 700, 2.0, period, 4) if noise_coeff < 1: noise_str = str(noise_coeff).split('.')[-1] else: noise_str = str(noise_coeff).split('.')[0] f0, time_axis = dio(x, fs, period, opt) f0_by_dio = copy.deepcopy(f0) f0 = stonemask(x, fs, period, time_axis, f0) spectrogram = star(x, fs, period, time_axis, f0) spectrogram = cheaptrick(x, fs, period, time_axis, f0) residual = platinum(x, fs, period, time_axis, f0, spectrogram) old_spectrogram = np.copy(spectrogram) plt.matshow(old_spectrogram, cmap="gray") plt.title("Before %s noise" % noise_str) plt.savefig("before_%s.png" % noise_str) random_state = np.random.RandomState(1999) spectrogram += noise_coeff * np.abs(random_state.randn(*spectrogram.shape)) residual += noise_coeff * np.abs(random_state.randn(*residual.shape)) y = synthesis(fs, period, f0, spectrogram, residual, len(x)) ys = synthesis(fs, period, f0, old_spectrogram, residual, len(x)) wavfile.write("y_%s.wav" % noise_str, fs, soundsc(y)) wavfile.write("y_no_noise.wav", fs, soundsc(ys)) plt.clf() plt.plot(soundsc(ys), label='orig') plt.plot(soundsc(y), label='noisy', color='red') plt.title("Comparison of time series with %s noise" % noise_str) plt.legend() plt.savefig("comparison_%s.png" % noise_str)
def array2audio(sDir, iRate, aData): """ writes an .wav audio file to disk from an array """ from scipy.io.wavfile import write write(sDir, iRate, aData)
def _process_loop(self): with WavProcessor() as proc: self._ask_data.set() while True: if self._process_buf is None: # Waiting for data to process time.sleep(self._processor_sleep_time) continue self._ask_data.clear() if self._save_path: f_path = os.path.join( self._save_path, 'record_{:.0f}.wav'.format(time.time()) ) wavfile.write(f_path, self._sample_rate, self._process_buf) logger.info('"{}" saved.'.format(f_path)) logger.info('Start processing.') predictions = proc.get_predictions( self._sample_rate, self._process_buf) logger.info( 'Predictions: {}'.format(format_predictions(predictions)) ) logger.info('Stop processing.') self._process_buf = None self._ask_data.set()
def writewav(audiopath="wave.npz", outpath="out.wav"): """ Write a wav file given an input sample array file that can be read with readfile. """ import scipy.io.wavfile as wav samplerate, samples = readfile(audiopath) wav.write(outpath, samplerate, samples) return
def test_filter(n): audio = wave.read("signal-echo.wav") audio[1][:,0] = convolve(audio[1][:,0], filters[n], 'same') audio[1][:,1] = audio[1][:,0] wave.write("signal-echo-out.wav", audio[0], audio[1])
def thumbnailWrapper(inputFile, thumbnailWrapperSize): st_window = 0.5 st_step = 0.5 if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(inputFile) if fs == -1: # could not read file return [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x, fs, st_window, st_step, thumbnailWrapperSize) # write thumbnailWrappers to WAV files: if inputFile.endswith(".wav"): thumbnailWrapperFileName1 = inputFile.replace(".wav", "_thumb1.wav") thumbnailWrapperFileName2 = inputFile.replace(".wav", "_thumb2.wav") if inputFile.endswith(".mp3"): thumbnailWrapperFileName1 = inputFile.replace(".mp3", "_thumb1.mp3") thumbnailWrapperFileName2 = inputFile.replace(".mp3", "_thumb2.mp3") wavfile.write(thumbnailWrapperFileName1, fs, x[int(fs * A1):int(fs * A2)]) wavfile.write(thumbnailWrapperFileName2, fs, x[int(fs * B1):int(fs * B2)]) print("1st thumbnailWrapper (stored in file {0:s}): {1:4.1f}sec" \ " -- {2:4.1f}sec".format(thumbnailWrapperFileName1, A1, A2)) print("2nd thumbnailWrapper (stored in file {0:s}): {1:4.1f}sec" \ " -- {2:4.1f}sec".format(thumbnailWrapperFileName2, B1, B2)) # Plot self-similarity matrix: fig = plt.figure() ax = fig.add_subplot(111, aspect="auto") plt.imshow(Smatrix) # Plot best-similarity diagonal: Xcenter = (A1 / st_step + A2 / st_step) / 2.0 Ycenter = (B1 / st_step + B2 / st_step) / 2.0 e1 = matplotlib.patches.Ellipse((Ycenter, Xcenter), thumbnailWrapperSize * 1.4, 3, angle=45, linewidth=3, fill=False) ax.add_patch(e1) plt.plot([B1/ st_step, Smatrix.shape[0]], [A1/ st_step, A1/ st_step], color="k", linestyle="--", linewidth=2) plt.plot([B2/ st_step, Smatrix.shape[0]], [A2/ st_step, A2/ st_step], color="k", linestyle="--", linewidth=2) plt.plot([B1/ st_step, B1/ st_step], [A1/ st_step, Smatrix.shape[0]], color="k", linestyle="--", linewidth=2) plt.plot([B2/ st_step, B2/ st_step], [A2/ st_step, Smatrix.shape[0]], color="k", linestyle="--", linewidth=2) plt.xlim([0, Smatrix.shape[0]]) plt.ylim([Smatrix.shape[1], 0]) ax.yaxis.set_label_position("right") ax.yaxis.tick_right() plt.xlabel("frame no") plt.ylabel("frame no") plt.title("Self-similarity matrix") plt.show()
def generate_from_image(filename, random_phases=False): SAMPLES_PER_WINDOW = 326 SAMPLING_RATE = 44100 spectrogram = mpimg.imread(filename) lum_spectrogram = luminosity(spectrogram) # mpimg reads in an image with an upside-down y-axis (i.e. 0 at the top and # max(y) at the bottom), so we need to flip it lum_spectrogram = np.flipud(lum_spectrogram) num_rows, num_cols = lum_spectrogram.shape num_samples = num_cols * SAMPLES_PER_WINDOW t = np.matrix(time(num_samples)) f = np.matrix(np.apply_along_axis(frequency, 0, np.arange(num_rows)/float(num_rows))) f = f.transpose() if random_phases: phi = np.matrix(np.random.rand(num_rows)*2*np.pi) phi = phi.transpose() else: phi = np.random.random()*2*np.pi a = amplitude(lum_spectrogram) oscillators = np.multiply(a, np.sin(2*np.pi*f*t+phi)) signal = oscillators.sum(axis=0) signal = signal / np.amax(np.absolute(signal)) signal = np.squeeze(np.asarray(signal)) wavfile.write("output_signal.wav", SAMPLING_RATE, signal)
from pydub.playback import play warnings.filterwarnings('ignore') path = r'/Users/peterzuker/Desktop/Audio Modification/10047/model_input/spells/1/exemplars/1499777912068.wav' #reload the audio to use librosa's expected format lr_speech_data, lr_speech_rate = librosa.load(path) stretched = librosa.effects.time_stretch(lr_speech_data, 1.47) y, sr = librosa.load(path) D = librosa.stft(y, n_fft=2048, hop_length=512) D_slow = librosa.phase_vocoder(D, 1. / 3, hop_length=512) y_slow = librosa.istft(D_slow, hop_length=512) wavfile.write('test.wav', y_slow, D_slow) rate, data = wavfile.read(path) sound = AudioSegment.from_file(path, format="wav") play(sound) def remove_silence(audio, threshold): #identify all samples with an absolute value greater than the threshold greater_index = numpy.greater(numpy.absolute(audio), threshold) #filter to only include the identified samples above_threshold_data = audio[greater_index] return above_threshold_data
def save_wav(self, wav, path): wav *= 32767 / max(0.01, np.max(np.abs(wav))) wavfile.write(path, self.sample_rate, wav.astype(np.int16))
hparams.quantize_channels - 1) elif is_mulaw(hparams.input_type): ref = P.inv_mulaw(ref, hparams.quantize_channels - 1) if hparams.postprocess is not None and hparams.postprocess not in [ "", "none" ]: ref = getattr(audio, hparams.postprocess)(ref) if hparams.global_gain_scale > 0: ref /= hparams.global_gain_scale # clip (just in case) gen = np.clip(gen, -1.0, 1.0) if has_ref_file: ref = np.clip(ref, -1.0, 1.0) wavfile.write(dst_wav_path, hparams.sample_rate, to_int16(gen)) if has_ref_file: wavfile.write(target_wav_path, hparams.sample_rate, to_int16(ref)) # log (TODO) if output_html and False: print(""" <audio controls="controls" > <source src="/{}/audio/{}/{}" autoplay/> Your browser does not support the audio element. </audio> """.format(hparams.name, dst_dir_name, basename(dst_wav_path))) print( "Finished! Check out {} for generated audio samples.".format(dst_dir))
import utilities as utl from scipy.io import wavfile import numpy as np # Read the files as numpy array rate1, data1 = wavfile.read("13-84-1.wav") rate2, data2 = wavfile.read("13-172-1.wav") # Using the mixSounds helper function from utilities.py mixedX = utl.mixSounds([data1, data2], [0.3, 0.7]).astype(np.int16) mixedY = utl.mixSounds([data1, data2], [0.6, 0.4]).astype(np.int16) # Plot the mixed sound sources utl.plotSounds([mixedX, mixedY], ["mixed-1", "mixed-3"], rate1, "../plots/sounds/Ring_StarWars_mixed", False) # Save the mixed sources as wav files wavfile.write("mixed-1.wav", rate1, mixedX) wavfile.write("mixed-3.wav", rate1, mixedY)
print(test.shape) # # INFO: The test data will have two parts, X_test and y_test. X_test is # going to be the first portion of the test audio file, which we will # be providing the computer as input. y_test, the "label" if you will, # is going to be the remaining portion of the audio file. Like such, # the computer will use linear regression to derive the missing # portion of the sound file based off of the training data its received! # # Save the original 'test' clip, the one you're about to delete # half of, so that you can compare it to the 'patched' clip once # you've generated it. HINT: you should have got the sample_rate # when you were loading up the .wav files: wavfile.write('Original Test Clip.wav', sample_rate, test) # # TODO: Prepare the TEST date by creating a slice called X_test. It # should have Provided_Portion * n_audio_samples audio sample features, # taken from your test audio file, currently stored in the variable # 'test'. In other words, grab the FIRST Provided_Portion * # n_audio_samples audio features from test and store it in X_test. This # should be accomplished using indexing. # # .. your code here .. slice_size = Provided_Portion * n_audio_samples X_test = test[:slice_size] #
def test_st_audio(self): """Test st.audio.""" # Fake audio data: expect the resultant mimetype to be audio default. fake_audio_data = "\x11\x22\x33\x44\x55\x66".encode("utf-8") st.audio(fake_audio_data) el = self.get_delta_from_queue().new_element # locate resultant file in InMemoryFileManager and test its properties. file_id = _calculate_file_id(fake_audio_data, "audio/wav") self.assertTrue(file_id in in_memory_file_manager) afile = in_memory_file_manager.get(file_id) self.assertEqual(afile.mimetype, "audio/wav") self.assertEqual(afile.url, el.audio.url) # Test using generated data in a file-like object. sampleRate = 44100 frequency = 440 length = 5 t = np.linspace( 0, length, sampleRate * length ) # Produces a 5 second Audio-File y = np.sin(frequency * 2 * np.pi * t) # Has frequency of 440Hz wavfile.write("test.wav", sampleRate, y) with io.open("test.wav", "rb") as f: st.audio(f) el = self.get_delta_from_queue().new_element self.assertTrue(".wav" in el.audio.url) os.remove("test.wav") # Test using a URL instead of data some_url = "https://www.soundhelix.com/examples/mp3/SoundHelix-Song-3.mp3" st.audio(some_url) el = self.get_delta_from_queue().new_element self.assertEqual(el.audio.url, some_url) # Test that a non-URL string is assumed to be a filename bad_filename = "blah" with self.assertRaises(FileNotFoundError): st.audio(bad_filename) # Test that we can use an empty/None value without error. st.audio(None) el = self.get_delta_from_queue().new_element self.assertEqual(el.audio.url, "") # Test that our other data types don't result in an error. st.audio(b"bytes_data") st.audio("str_data".encode("utf-8")) st.audio(BytesIO(b"bytesio_data")) st.audio(np.array([0, 1, 2, 3]))
def export(self, filename): # Casting as reals and absolute maximum? scaled = sp.real(self.wave) scaled = sp.int16(scaled * 32676. / scaled.max()) wavfile.write(filename, self.rate, scaled)
marker='o') plt.legend() plt.tight_layout(pad=0.5) ## GUI if not args.gui: plt.show() else: plt.show(block=False) if args.save: from scipy.io import wavfile wavfile.write( 'bss_iva_mix.wav', room.fs, pra.normalize(mics_signals[0, :], bits=16).astype(np.int16)) for i, sig in enumerate(y): wavfile.write('bss_iva_source{}.wav'.format(i + 1), room.fs, pra.normalize(sig, bits=16).astype(np.int16)) if args.gui: # Make a simple GUI to listen to the separated samples from tkinter import Tk, Button, Label import sounddevice as sd # Now comes the GUI part class PlaySoundGUI(object): def __init__(self, master, fs, mix, sources): self.master = master
spec_thresh = 4 # threshold for spectrograms (lower filters out more noise) lowcut = 500 # Hz # Low cut for our butter bandpass filter highcut = 15000 # Hz # High cut for our butter bandpass filter # For mels n_mel_freq_components = 64 # number of mel frequency channels shorten_factor = 10 # how much should we compress the x-axis (time) start_freq = 300 # Hz # What frequency to start sampling our melS from end_freq = 8000 # Hz # What frequency to stop sampling our melS from # Grab your wav and filter it mywav = "sound01.wav" rate, data = wavfile.read(mywav) data = data[:,0] # Only take one channel wavfile.write("audio_in_out01.wav", rate, data) data = butter_bandpass_filter(data, lowcut, highcut, rate, order=1) # Only use a short clip for our demo if np.shape(data)[0] / float(rate) > 10: data = data[0 : rate * 10] print("Length in time (s): ", np.shape(data)[0] / float(rate)) wav_spectrogram = pretty_spectrogram( data.astype("float64"), fft_size=fft_size, step_size=step_size, log=True, thresh=spec_thresh,
def write(self, filename): write(filename, 44100, self.full_sample)
def logMMSE(inputFilePath, outputFilePath): [sample_rate, sample_data] = wavfile.read(inputFilePath, True) # Frame size in samples len = np.int(np.floor(20 * sample_rate * 0.001)) if len % 2 == 1: len += 1 # window overlap in percent of frame size perc = 50 len1 = np.floor(len * perc * 0.01) len2 = len - len1 win = np.hanning(len) win = win * len2 / sum(win) # Noise magnitude calculations - assuming that the first 6 frames is noise / silence nFFT = len << 2 noise_mean = np.zeros([nFFT, 1]) dtype = 2 << 14 j = 0 for i in range(1, 7): s1 = j s2 = j + np.int(len) batch = sample_data[s1:s2] / dtype X = win * batch foo = np.fft.fft(X, np.int(nFFT)) noise_mean += np.abs(foo.reshape(foo.shape[0], 1)) j += len noise_mu = np.square(noise_mean / 6) # Allocate memory and initialize various variables x_old = np.zeros([np.int(len1), 1]) Nframes = np.floor(sample_data.shape[0] / len2) - np.floor(len / len2) xfinal = np.zeros([np.int(Nframes * len2), 1]) # Start Processing k = 0 aa = 0.98 mu = 0.98 eta = 0.15 ksi_min = 10**(-25 * 0.1) for n in range(0, np.int(Nframes)): s1 = k s2 = k + np.int(len) batch = sample_data[s1:s2] / dtype insign = win * batch spec = np.fft.fft(insign, nFFT) # Compute the magnitude sig = abs(spec) sig2 = sig**2 # Limit post SNR to avoid overflows gammak = np.divide(sig2.reshape(sig2.shape[0], 1), noise_mu.reshape(noise_mu.shape[0], 1)) gammak[gammak > 40] = 40 foo = gammak - 1 foo[foo < 0] = 0 if 0 == n: ksi = aa + (1 - aa) * foo else: # a priori SNR ksi = aa * Xk_prev / noise_mu + (1 - aa) * foo # limit ksi to - 25 db ksi[ksi < ksi_min] = ksi_min log_sigma_k = gammak * ksi / (1 + ksi) - np.log(1 + ksi) vad_decision = sum(log_sigma_k) / len # noise only frame found if vad_decision < eta: noise_mu = mu * noise_mu + (1 - mu) * sig2.reshape( [sig2.shape[0], 1]) # == = end of vad == = # Log - MMSE estimator A = ksi / (1 + ksi) vk = A * gammak ei_vk = 0.5 * expn(1, vk) hw = A * np.exp(ei_vk) sig = sig.reshape([sig.shape[0], 1]) * hw Xk_prev = sig**2 xi_w = ifft(hw * spec.reshape([spec.shape[0], 1]), nFFT, 0) xi_w = np.real(xi_w) xfinal[k:k + np.int(len2)] = x_old + xi_w[0:np.int(len1)] x_old = xi_w[np.int(len1):np.int(len)] k = k + np.int(len2) wavfile.write(outputFilePath, sample_rate, xfinal)
chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]]) chunks = chunks[1:] outputAudioData = np.zeros((0, audioData.shape[1])) outputPointer = 0 lastExistingFrame = None for chunk in chunks: audioChunk = audioData[int(chunk[0] * samplesPerFrame):int(chunk[1] * samplesPerFrame)] sFile = TEMP_FOLDER + "/" + TEMP_START_FILE_NAME eFile = TEMP_FOLDER + "/" + TEMP_END_FILE_NAME wavfile.write(sFile, SAMPLE_RATE, audioChunk) with WavReader(sFile) as reader: with WavWriter(eFile, reader.channels, reader.samplerate) as writer: tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])]) tsm.run(reader, writer) _, alteredAudioData = wavfile.read(eFile) leng = alteredAudioData.shape[0] endPointer = outputPointer + leng outputAudioData = np.concatenate( (outputAudioData, alteredAudioData / maxAudioVolume)) #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume # smooth out transitiion's audio by quickly fading in/out if leng < AUDIO_FADE_ENVELOPE_SIZE:
prev_w = w_s cond = c_mb print("Completed sampling after %i steps" % fixed_steps) completed = np.array(completed).transpose(1, 0, 2) rlookup = {v: k for k, v in vocabulary.items()} all_strings = [] for yi in y: ex_str = "".join([rlookup[c] for c in np.argmax(yi, axis=1)]) all_strings.append(ex_str) for i in range(len(completed)): ex = completed[i] ex_str = "".join( [rlookup[c] for c in np.argmax(cond[:, i], axis=1)]) s = "gen_%s_%i.wav" % (ex_str, i) ii = reconstruct(ex) wavfile.write(s, fs, soundsc(ii)) if ex_str in all_strings: inds = [ n for n, s in enumerate(all_strings) if ex_str == s ] ind = inds[0] it = reconstruct(X[ind]) s = "orig_%s_%i.wav" % (ex_str, i) wavfile.write(s, fs, soundsc(it)) valid_itr.reset() print("Sampling complete, exiting...") sys.exit() else: print("No plotting arguments, starting training mode!") X_sym = tensor.tensor3("X_sym")
f_R = 6000 f_G = 8000 f_B = 10000 fs = 40000 # Periods per bit periods = 2000 # Generate signal signal = generate_signal(word, image_path, f_word, f_R, f_G, f_B, fs, periods, repetitions=3, sync_repetitions=3, estimation_repetitions=3, inter_repetition_periods=1) signal = np.append(np.zeros(800), signal) scaled = np.float32((signal / np.max(np.abs(signal)))) wav.write('test.wav', 44100, scaled) fs1, test = wav.read( r'C:\Users\dudam\PycharmProjects\principios\grabacion2.wav') word = decode_signal(test, f_word, f_R, f_G, f_B, fs)
import recombination import genetic_algorithm import objective_function # Parameters that can be tuned to your liking input_dir = 'inputs' output_dir = 'outputs' desired_chord = 'C_MAJOR' desired_scale = 'C_MAJOR' desired_generations = 1 desired_crossover_points = 5 desired_mutations = 10 desired_prob = 50 if __name__ == '__main__': file_list = listdir(input_dir) init_pop = [] rand_idxs = random.sample(range(len(file_list)), int(len(file_list) / 2)) for i in rand_idxs: dat = (wavfile.read(input_dir + '/' + file_list[i]))[1] if (dat.ndim == 1): dat_mono = dat else: dat_mono = dat.T[0] loudest = np.amax(np.abs(dat_mono)) init_pop.append(np.float32(dat_mono / loudest)) pop = genetic_algorithm.evolve(init_pop, desired_generations, desired_chord, desired_scale) for i in range(len(pop)): wavfile.write(output_dir + '/result_' + str(i) + '.wav', 44100, pop[i])
arpabet_dict))[None, :].cuda() pitch_contour = dataloader[file_idx][3][None].cuda() mel = load_mel(audio_path) print(audio_path, text) # load source data to obtain rhythm using tacotron 2 as a forced aligner x, y = mellotron.parse_batch(datacollate([dataloader[file_idx]])) ipd.Audio(audio_path, rate=hparams.sampling_rate) with torch.no_grad(): # get rhythm (alignment map) using tacotron 2 _, _, _, rhythm = mellotron.forward(x) rhythm = rhythm.permute(1, 0, 2) speaker_id = torch.LongTensor([1]).cuda() with torch.no_grad(): mel_outputs, mel_outputs_postnet, gate_outputs, _ = mellotron.inference_noattention( (text_encoded, mel, speaker_id, lang_code, pitch_contour, rhythm)) plot_mel_f0_alignment(x[2].data.cpu().numpy()[0], mel_outputs_postnet.data.cpu().numpy()[0], pitch_contour.data.cpu().numpy()[0, 0], rhythm.data.cpu().numpy()[:, 0].T) with torch.no_grad(): audio = denoiser(waveglow.infer(mel_outputs_postnet, sigma=0.8), 0.01)[:, 0] audio = audio.cpu().numpy() audio = audio / np.max(np.abs(audio)) write("{} {}.wav".format(str(file_idx), speaker_id.item()), hparams.sampling_rate, audio)
def evaluate(model, musdb_path, eval_folder, workers=2, device="cpu", rank=0, save=False, shifts=0, split=False, overlap=0.25, is_wav=False, world_size=1): """ Evaluate model using museval. Run the model on a single GPU, the bottleneck being the call to museval. """ output_dir = eval_folder / "results" output_dir.mkdir(exist_ok=True, parents=True) json_folder = eval_folder / "results/test" json_folder.mkdir(exist_ok=True, parents=True) # we load tracks from the original musdb set test_set = musdb.DB(musdb_path, subsets=["test"], is_wav=is_wav) src_rate = 44100 # hardcoded for now... for p in model.parameters(): p.requires_grad = False p.grad = None pendings = [] with futures.ProcessPoolExecutor(workers or 1) as pool: for index in tqdm.tqdm(range(rank, len(test_set), world_size), file=sys.stdout): track = test_set.tracks[index] out = json_folder / f"{track.name}.json.gz" if out.exists(): continue mix = th.from_numpy(track.audio).t().float() ref = mix.mean(dim=0) # mono mixture mix = (mix - ref.mean()) / ref.std() mix = convert_audio(mix, src_rate, model.samplerate, model.audio_channels) estimates = apply_model(model, mix.to(device), shifts=shifts, split=split, overlap=overlap) estimates = estimates * ref.std() + ref.mean() estimates = estimates.transpose(1, 2) references = th.stack([ th.from_numpy(track.targets[name].audio).t() for name in model.sources ]) references = convert_audio(references, src_rate, model.samplerate, model.audio_channels) references = references.transpose(1, 2).numpy() estimates = estimates.cpu().numpy() win = int(1. * model.samplerate) hop = int(1. * model.samplerate) if save: folder = eval_folder / "wav/test" / track.name folder.mkdir(exist_ok=True, parents=True) for name, estimate in zip(model.sources, estimates): wavfile.write(str(folder / (name + ".wav")), 44100, estimate) if workers: pendings.append((track.name, pool.submit(museval.evaluate, references, estimates, win=win, hop=hop))) else: pendings.append((track.name, museval.evaluate(references, estimates, win=win, hop=hop))) del references, mix, estimates, track for track_name, pending in tqdm.tqdm(pendings, file=sys.stdout): if workers: pending = pending.result() sdr, isr, sir, sar = pending track_store = museval.TrackStore(win=44100, hop=44100, track_name=track_name) for idx, target in enumerate(model.sources): values = { "SDR": sdr[idx].tolist(), "SIR": sir[idx].tolist(), "ISR": isr[idx].tolist(), "SAR": sar[idx].tolist() } track_store.add_target(target_name=target, values=values) json_path = json_folder / f"{track_name}.json.gz" gzip.open(json_path, "w").write(track_store.json.encode('utf-8')) if world_size > 1: distributed.barrier()
import scipy.io.wavfile as wav import matplotlib.pyplot as plt import numpy as np import sounddevice as sd filename = 'worksample.wav' # GRAVAÇÃO DO ARQUIVO fs = 44100 # Sample rate seconds = 3 # Duração da gravação print('Gravação em Andamento...') myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=1) sd.wait() # Espera até o fim do processo de gravamento print('Gravação completa') wav.write(filename, fs, myrecording) # Salva como um arquivo WAV # APLICAÇÃO DA FFT E GRÁFICOS samplerate, data = wav.read( filename) # Lê o samplerate e informações do arquivo nf = 16384 Y = np.fft.fft(data, nf) # Aplicação da FFT no áudio # Normalização da saída da FFT ynorm = np.abs(Y[0:round(nf / 2 + 1)]) ynorm = (ynorm - np.min(ynorm)) / (np.max(ynorm) - np.min(ynorm)) f = samplerate / 2 * np.linspace(0, 1, round(nf / 2 + 1)) # Plotagem da transformada e do audio. plt.figure(1)
def write_wav(signal, filename, overwrite=True): """ Write a signal as a WAV file. Parameters ---------- signal : Signal object An audio signal object from the pyfar Signal class. filename : string or open file handle Output wav file. overwrite : bool Select wether to overwrite the WAV file, if it already exists. The default is True. Notes ----- * This function is based on scipy.io.wavfile.write(). * Writes a simple uncompressed WAV file. * Signals of shape larger than 1D are flattened. * The bits-per-sample and PCM/float will be determined by the data-type. Common data types: [1]_ ===================== =========== =========== ============= WAV format Min Max NumPy dtype ===================== =========== =========== ============= 32-bit floating-point -1.0 +1.0 float32 32-bit PCM -2147483648 +2147483647 int32 16-bit PCM -32768 +32767 int16 8-bit PCM 0 255 uint8 ===================== =========== =========== ============= Note that 8-bit PCM is unsigned. References ---------- .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming Interface and Data Specifications 1.0", section "Data Format of the Samples", August 1991 http://www.tactilemedia.com/info/MCI_Control_Info.html """ sampling_rate = signal.sampling_rate data = signal.time # Reshape to 2D data = data.reshape(-1, data.shape[-1]) if len(signal.cshape) != 1: warnings.warn(f"Signal flattened to {data.shape[0]} channels.") # .wav file extension filename = pathlib.Path(filename).with_suffix('.wav') # Check if file exists and for overwrite if overwrite is False and os.path.isfile(filename): raise FileExistsError("File already exists," "use overwrite option to disable error.") else: wavfile.write(filename, sampling_rate, data.T)
simplest_possible_plot(timelimited_signal_data_normalized, "timelimited_normalized_signal_data.png") # Process a convolution on signal data using impulse response convolved = signal.convolve( timelimited_signal_data_normalized, timelimited_impulse_response_data, ) # Normalize convolved data convolved_normalized = normalize_np_array(convolved) simplest_possible_plot(convolved_normalized, "normalized_convolved.png") # Save normalized convolved data as wav file. wavfile.write(convolved_signal_file_path, signal_fs, convolved_normalized) # Read convolved data from wav file. It is simulation of real-life usage when we have convoluted signal and # impulse response, not pure input signal. convolved_fs, read_convolved_data = wavfile.read( convolved_signal_file_path) read_convolved_data_normalized = normalize_np_array(read_convolved_data) simplest_possible_plot(read_convolved_data_normalized, "normalized_read_convolved_data.png") # Deconvolve read convolved signal with given impulse response. recovered, remainder = signal.deconvolve( read_convolved_data_normalized, timelimited_impulse_response_data)
def save_wave(self, path, wave): wavfile.write(path, self.sampling_rate, self.to_int(wave))
# numpy.array(raw_video, dtype="float32") # a_norm = np.linalg.norm(raw_video) # raw_vv = raw_video/a_norm # print(raw_vv) # print(max(raw_vv)) # print(min(raw_vv)) # print(max(raw_video)) # print(min(raw_video)) samplerate = 48000 # fs = 100 # t = np.linspace(0., 1., samplerate) # amplitude = np.iinfo(np.int16).max # data = amplitude * np.sin(2. * np.pi * fs * t) raw2_name = raw_list[j][:-4] + '.wav' nn = join(v_path, raw2_name) write(nn, samplerate, raw_video.astype(np.float32)) # print(raw_video) # # np.save(raw2_name, raw_video) # print("save .npy done") # # if k in range(6): # # break # # ag_video = get_ag_video(raw_video, k) # ag_video = get_ag_video(raw_video, k+1) # # save audio # sampleRate = 48000 # hertz # w_name1 = join(c_path, ag_list[k]) # w_name2 = raw_list[j][:-4] + '_' + ag_list[k] + '.wav' # w_name = join(w_name1, w_name2) # # print(w_name) # obj = wave.open(w_name, 'w')
def write(self, file_name): wav.write(file_name, self.rate, self.samples)
def separateHP(filename, winlength, alpha=0.5, gamma=0.4): # Open and read the file fs, y = read(filename) # Calculate the STFT i.e. transform to time-frequency domain winlen = int(float(winlength) / 1000.0 * float(fs)) hoplen = int(winlen / 2) y = y / np.max(np.abs(y)) F = stft(y, n_fft=winlen, hop_length=hoplen, window=windows.hann) # Calculate range-compressed version of power spectrogram W = np.power(np.abs(F), 2 * gamma) # Initialize percussion and harmonic values H = W / 2 P = W / 2 # Length of transform timewise K = np.size(W, 1) # Update P & H for i in range(0, K - 1): # Calculate delta matrix which describes energy changes between frequencies delta = np.zeros([hoplen + 1, 1]) for h in range(1, hoplen): delta[h, 0] = alpha * ( (H[h, i - 1] - 2 * H[h, i] + H[h, i + 1]) / 4) - (1 - alpha) * ( (P[h - 1, i] - 2 * P[h, i] + P[h + 1, i]) / 4) H[:, i + 1] = np.minimum(np.maximum(np.add(H[:, i], delta[:, 0]), 0), W[:, i + 1]) P[:, i + 1] = np.subtract(W[:, i + 1], H[:, i + 1]) # Binarize the separation for i in range(0, K - 1): for h in range(0, hoplen): if H[h, i - 1] < P[h, i - 1]: H[h, i] = 0 P[h, i] = W[h, i] else: H[h, i] = W[h, i] P[h, i] = 0 # Visualize P & H plt.figure(figsize=[15, 5]) plt.subplot(1, 2, 1) #specshow(20*np.log10(1e-10+np.abs(H)), y_axis='log', x_axis='time',sr=fs,hop_length=hoplen) specshow(H, y_axis='linear', x_axis='time', sr=fs, hop_length=hoplen) plt.title('H') #plt.pcolormesh(H) plt.subplot(1, 2, 2) #specshow(20*np.log10(1e-10+np.abs(P)), y_axis='log', x_axis='time',sr=fs,hop_length=hoplen) specshow(P, y_axis='linear', x_axis='time', sr=fs, hop_length=hoplen) plt.title('P') #plt.pcolormesh(P) plt.show() # Convert into waveform h = istft(np.power(H, 1 / (2 * gamma)) * np.exp(1j * np.angle(F)), win_length=winlen, hop_length=hoplen, window=windows.hann) p = istft(np.power(P, 1 / (2 * gamma)) * np.exp(1j * np.angle(F)), win_length=winlen, hop_length=hoplen, window=windows.hann) h = h / np.max(np.abs(h)) p = p / np.max(np.abs(p)) # Save harmonic and percussive audiofiles audio_name = '%s-harmonics.wav' % (filename[:-4]) write(audio_name, fs, h) audio_name = '%s-percussive.wav' % (filename[:-4]) write(audio_name, fs, p) SNR = 20 * np.log10( np.abs((np.sum(y)) / (np.sum(y) - np.sum(p) - np.sum(h)))) print("SNR for signal p:", SNR, "dB")
with torch.no_grad(): audio = model.inference(src_seq, src_pos, mel_max_len, alignment_target, sigma=1.0, alpha=1.0) audio = audio * MAX_WAV_VALUE audio = audio.squeeze() print(torch.mean(audio)) audio = audio.cpu().numpy() audio_tgt = torch.cat(audio_tgt) audio_tgt = audio_tgt * MAX_WAV_VALUE print(torch.mean(audio_tgt)) #print (audio_tgt) audio_tgt = audio_tgt.squeeze() audio_tgt = audio_tgt.cpu().numpy() audio = audio.astype('int16') audio_tgt = audio_tgt.astype('int16') audio_path = os.path.join("results", "test_{}_synthesis.wav".format(i)) '''audio_tgt_path = os.path.join( "results", "{}_tgt.wav".format(i))''' write(audio_path, sampling_rate, audio) # write(audio_tgt_path, sampling_rate, audio_tgt) if i >= 10: break
def microphone(): fs = 44100 second = microphone_time myrecording = sd.rec(int(second * fs), samplerate=fs, channels=2) sd.wait() write(file_path+extend+audio_info, fs, myrecording)
tsm.run(reader, writer) spedupAudio = writer.output yPointerEnd = yPointer + spedupAudio.shape[0] y[yPointer : yPointerEnd] = spedupAudio yPointer = yPointerEnd writeFrames(frameBuffer, yPointerEnd, silentSpeed, sampleRate, out) frameBuffer = [] switchStart = switchEnd normal = 1 if skipped % 1000 == 0: print("{} frames inspected".format(skipped)) skipped += 1 y = y[:yPointer] wavfile.write("spedupAudio.wav", sampleRate, y) cap.release() out.release() cv2.destroyAllWindows() mergeCommand = "ffmpeg -i spedup.mp4 -i spedupAudio.wav -c:v h264_nvenc -threads 0 out_{}".format(videoFile) error = subprocess.call(mergeCommand, shell=True) if error == 0: removeCommand = "rm output.wav spedup.mp4 spedupAudio.wav" subprocess.call(removeCommand, shell=True)
#plt.plot(wav_data) ### Filters audio data ### low_cutoff = 60 high_cutoff = 6000 wn = [low_cutoff / (fs / 2), high_cutoff / (fs / 2)] b, a = dsp.butter(4, wn, 'band') wav_data_filt = dsp.filtfilt(b, a, wav_data) #plt.plot(wav_data_filt) ### Resamples audio to 12 kHz ### wav_data_12kHz = dsp.resample(wav_data_filt, int(wav_data_filt.shape[0] / 4)) #plt.plot(wav_data_12kHz) ### Splits data into N chunks ### N = int(len(wav_data_12kHz) / 10) n = 1 for i in range(0, len(wav_data_12kHz), N): temp_wav_data = wav_data_12kHz[i:i + N] temp_wav_data = temp_wav_data * 32767 temp_wav_data.astype(np.int16) newWavName = file[:-7] + 'feat_' + str(n) + '.wav' wavfile.write('./processedRecordings/' + newWavName, 12000, temp_wav_data) n += 1 #plt.plot(temp_wav_data) #print(i)
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # Commentary: # - PyWORLDによる音声の分析再合成 from scipy.io import wavfile import numpy as np import pyworld IN_WAVE_FILE = "in.wav" # 入力音声 OUT_WAVE_FILE = "out.wav" # 分析再合成した音声 # 音声の読み込み fs, x = wavfile.read(IN_WAVE_FILE) x = x.astype(np.float64) # 音声の分析 (基本周波数、スペクトル包絡、非周期性指標) f0, sp, ap = pyworld.wav2world(x, fs) # 音声の再合成 y = pyworld.synthesize(f0, sp, ap, fs) y = y.astype(np.int16) # wavファイルに保存 wavfile.write(OUT_WAVE_FILE, fs, y)
def write_audio_file(filename, data, sr=44100): wavfile.write(filename, sr, data) #librosa.output.write_wav(filename, data, sr) #torchaudio.save(filename, torch.Tensor(data).unsqueeze(1), sr) return