def run(self): speech_dir = 'E:\\alon_emanuel_drive\Downloads' \ '\Audio_Speech_Actors_01-24' song_dir = 'E:\\alon_emanuel_drive\Downloads\Audio_Song_Actors_01-24' good = 0 bad = 0 for dir in os.listdir(speech_dir): for file in tqdm(os.listdir(os.path.join(speech_dir, dir))): file = os.path.join(speech_dir, dir, file) try: wavo = wavio.read(file) except: bad += 1 else: good += 1 for dir in os.listdir(song_dir): for file in tqdm(os.listdir(os.path.join(song_dir, dir))): file = os.path.join(song_dir, dir, file) try: wavo = wavio.read(file) except: bad += 1 else: good += 1 print(f'Good: {good}') print(f'Bad: {bad}')
def __init__(self, file, wind=400): self.directory = file self.sound = sa.WaveObject.from_wave_file(file) self.rate = wavio.read(file).rate self._raw = np.ravel(wavio.read(file).data) self.data = normalize_level(self._raw) # print(type(self.data)) self.envelope = windowed_avg(self.data, wind) self.spectrum = np.abs(np.fft.rfft(self.data, n=21654))
def get_input_processed_pair(model, file_info, batch, total_batches): # Get the clean file path by searching through the file's parent directory. clean_path = None for possibly_clean_file in os.listdir(file_info.directory): if possibly_clean_file.endswith("_clean.wav"): clean_path = os.path.join(file_info.directory, possibly_clean_file) # Load both the clean and distorted files. file_object = wavio.read(clean_path) #Calculate bit depth for normalization bitdepth_divisor = float(2**(file_object.sampwidth*8 - 1)) x = file_object.data y = wavio.read(file_info.global_path).data assert (x.shape[1] == y.shape[1]), "Clean file has different number of channels than non-clean file." num_channels = x.shape[1] # Pad the data. x = np.pad(x, ((0, x.shape[0] % model.frame_size), (0, 0)), 'constant', constant_values=(0, 0)) y = np.pad(y, ((0, y.shape[0] % model.frame_size), (0, 0)), 'constant', constant_values=(0, 0)) # Pad clean beginning to match receptive field. x = np.pad(x, ((model.R, 0), (0, 0)), 'constant', constant_values=(0, 0)) # Take the batch. Use the distorted data shape since it hasn't been # padded with the receptive field. start = int((batch / total_batches) * y.shape[0]) end = int(((batch + 1.0) / total_batches) * y.shape[0]) y = y[start:end,...] x = x[start:end+model.R,...] # Normalize to [-1.0, 1.0] y = y.astype(np.float32, order='C') / bitdepth_divisor x = x.astype(np.float32, order='C') / bitdepth_divisor # Dimension 0 is the number of frame_sizes that fit in x[0] - receptive field samples # Dimension 1 is framesize + receptive field (what is necessary for lookback on a given sample) # Dimension 2 is channels new_x_shape = (int((x.shape[0]-model.R)/model.frame_size), model.frame_size + model.R, num_channels) # stride specifies how many bits we have to move in each dimension for new view xstride = (model.frame_size*4*num_channels, 4*num_channels, 4) x = np.lib.stride_tricks.as_strided(x, new_x_shape, xstride) # Dimension 0 is the number of frame_sizes that fit in y[0] samples # Dimension 1 is framesize (what is necessary for lookback on a given sample) # Dimension 2 is left and right channels new_y_shape = (int((y.shape[0])/model.frame_size), model.frame_size, num_channels) ystride = (model.frame_size*4*num_channels, 4*num_channels, 4) y = np.lib.stride_tricks.as_strided(y, new_y_shape, ystride) return x, y
def extract(filename): """ 返回给定文件的 mfcc 特征 """ wavobj = wavio.read(filename) fs = wavobj.rate signal = wavobj.data mfcc = MFCC.extract(fs, signal) return mfcc
def get_audio(audio_path, is_crop=True): try: # pdb.set_trace() wave_obj = wavio.read(audio_path) rate = wave_obj.rate sig = np.squeeze(wave_obj.data) # (rate,sig) = wav.read(ad) except TypeError: # print(ad) (rate,sig) = wav.read(audio_path) # only short than 10 seconds # if np.shape(sig)[0]/float(rate) > 10: # sig = sig[0:rate*10] # Mel-filter bank sig = sig - np.mean(sig) fbank_feat = logfbank(sig, rate, winlen=0.025,\ winstep=0.01,nfilt=40,nfft=512,lowfreq=0,highfreq=None,preemph=0.97) if is_crop: if fbank_feat.shape[0] < 1024: # pdb.set_trace() zero_pad = np.zeros((1024-fbank_feat.shape[0], 40)) fbank_feat = np.concatenate([fbank_feat, zero_pad], 0) else: fbank_feat = fbank_feat[:1024] return fbank_feat
def showNoiseEnergies(): import pylab as pl import SignalProc #sp = SignalProc.SignalProc(data5, sampleRate) pl.ion() tbd = [0, 1, 3, 7, 15, 31] #tbd = np.concatenate([np.arange(30),np.arange(50,63)]) #tbd = np.arange(50) listnodes = np.arange(63) listnodes = np.delete(listnodes, tbd) for root, dirs, files in os.walk( str('Sound Files/Noise examples/Noise_10s')): for filename in files: if filename.endswith('.wav'): filename = root + '/' + filename wavobj = wavio.read(filename) sampleRate = wavobj.rate data = wavobj.data if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = np.squeeze(data[:, 0]) pl.figure() e1 = WaveletSegment.computeWaveletEnergy_1s(data, 'dmey2') pl.plot(e1[listnodes]) pl.title(filename)
def testFeatures(): import wavio wavobj = wavio.read('Sound Files/tril1.wav') fs = wavobj.rate data = wavobj.data if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] sp = SignalProc.SignalProc(sampleRate=fs, window_width=256, incr=128) # The next lines are to get a spectrogram that *should* precisely match the Raven one #sg = sp.spectrogram(data, multitaper=False, window_width=256, incr=128, window='Ones') #sg = sg ** 2 sg = sp.spectrogram(data, multitaper=False, window_width=256, incr=128, window='Hann') f = Features(data,fs,256,128) features = [] # Loop over the segments (and time slices within?) features.append([f.get_Raven_spectrogram_measurements(sg=sg,fs=fs,window_width=256,f1=0,f2=np.shape(sg)[1],t1=0,t2=np.shape(sg)[0]),f.get_Raven_robust_measurements(sg,fs,0,np.shape(sg)[1],0,np.shape(sg)[0]),f.get_Raven_waveform_measurements(data,fs,0,len(data)),f.wiener_entropy(sg)]) # Will need to think about feature vector length for the librosa features, since they are on fixed windows f.get_chroma() f.get_mfcc() f.get_tonnetz() f.get_spectral_features() f.get_lpc(data,order=44)
def read(file, dtype='float'): wave = wavio.read(file) assert wave.sampwidth == 2 and wave.rate == constants.FRAMERATE left = wave.data[:, 0].astype(dtype) right = wave.data[:, 1].astype(dtype) if wave.data.shape[1] > 1 else None return left, right
def loadData(self,fName,trainTest=True): # Load data filename = fName+'.wav' #'train/kiwi/train1.wav' filenameAnnotation = fName+'-sec.txt'#'train/kiwi/train1-sec.xlsx' try: wavobj = wavio.read(filename) except: print("unsupported file: ", filename) pass self.sampleRate = wavobj.rate self.data = wavobj.data if self.data.dtype is not 'float': self.data = self.data.astype('float') #/ 32768.0 if np.shape(np.shape(self.data))[0]>1: self.data = np.squeeze(self.data[:,0]) n=int(np.ceil(len(self.data)/self.sampleRate)) if trainTest==True: #survey data don't have annotations # Get the segmentation from the txt file import csv self.annotation = [] count = 0 with open(filenameAnnotation) as f: reader = csv.reader(f, delimiter="\t") d = list(reader) for row in range(0,n): self.annotation.append(int(d[row][1]))
def testFeatures(): wavobj = wavio.read('D:\AviaNZ\Sound_Files\Denoising_paper_data\Primary_dataset\kiwi\male\male1.wav') fs = wavobj.rate data = wavobj.data if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] sp = SignalProc.SignalProc(sampleRate=fs, window_width=256, incr=128) # The next lines are to get a spectrogram that *should* precisely match the Raven one #sg = sp.spectrogram(data, multitaper=False, window_width=256, incr=128, window='Ones') #sg = sg ** 2 sg = sp.spectrogram(data, sgType='Standard',window_width=256, incr=128, window='Hann') f = Features(data, fs, 256, 128) features = [] # Loop over the segments (and time slices within?) mfcc = f.get_mfcc().tolist() # features.append(mfcc.tolist()) we = f.get_WE() we = we.transpose().tolist() # how to combine features with different resolution? features.append([f.get_Raven_spectrogram_measurements(sg=sg,fs=fs,window_width=256,f1=0,f2=np.shape(sg)[1],t1=0,t2=np.shape(sg)[0]),f.get_Raven_robust_measurements(sg,fs,0,np.shape(sg)[1],0,np.shape(sg)[0]),f.get_Raven_waveform_measurements(data,fs,0,len(data)),f.wiener_entropy(sg)]) # Will need to think about feature vector length for the librosa features, since they are on fixed windows f.get_chroma() f.get_mfcc() f.get_tonnetz() f.get_spectral_features() f.get_lpc(data,order=44)
def read(filename, limit=None): try: audiofile = AudioSegment.from_file(filename) if limit: audiofile = audiofile[:limit * 1000] data = np.fromstring(audiofile._data, np.int16) channels = [] for chn in range(audiofile.channels): channels.append(data[chn::audiofile.channels]) frame_rate = audiofile.frame_rate except audioop.error: wav = wavio.read(filename) audiofile = wav.data if limit: audiofile = audiofile[:limit * 1000] audiofile = audiofile.T audiofile = audiofile.astype(np.int16) channels = [] for chn in audiofile: channels.append(chn) frame_rate = wav.rate return channels, frame_rate, unique_hash(filename)
def read_wav(filename): """ Reads a wav file and returns the associated data Parameters ---------- filename : string The name of the wav file you want to read Returns ------- rate The sampling rate of the wav length The number of samples in each channel resolution The number of bytes per sample nc The number of sound channels x [length, nc]-dimension array containing the wav data """ wave = wavio.read(filename) rate = wave.rate length = wave.data.shape[0] resolution = wave.sampwidth nc = wave.data.shape[1] x = wave.data return [rate, length, resolution, nc, x]
def test4(self): path = tempfile.mkdtemp() filename = os.path.join(path, "test4data.wav") data = np.zeros(32, dtype=np.int16) data[1::4] = 10000 data[3::4] = -10000 wavio.write(filename, data, 44100, sampwidth=1) try: f = wave.open(filename, 'r') self.assertEqual(f.getnchannels(), 1) self.assertEqual(f.getsampwidth(), 1) self.assertEqual(f.getframerate(), 44100) f.close() w = wavio.read(filename) self.assertEqual(w.rate, 44100) self.assertEqual(w.sampwidth, 1) self.assertEqual(w.data.dtype, np.uint8) self.assertEqual(w.data.shape, (32, 1)) expected = 128*np.ones_like(data, dtype=np.uint8).reshape(-1, 1) expected[1::4, 0] = 255 expected[3::4, 0] = 0 np.testing.assert_equal(w.data, expected) finally: os.remove(filename) os.removedirs(path)
def open_wavefile(filename, target_rms=.01): print "READING: " + filename try: wav = wavio.read(filename) fs, wavefile = wav.rate, wav.data # [fs, width, wavefile] = readwav(filename) except: print traceback.format_exc() print "ERROR: could not read file" sys.exit(1) x = np.array(wavefile, dtype=float) x = x * (2**-15) # normalizing to match MATLAB double representation # print "\nfirst few samples of x:\n", x[0:5,:] dim = x.shape num_chan = 1 # normalize if len(dim) > 1: num_chan = dim[1] for c in range(0, num_chan): rms = np.sqrt(np.mean(np.square(x[:, c]))) x[:, c] = 1. * x[:, c] / rms * target_rms + np.random.rand(x.shape[ 0]) * 1e-20 # adding noise for files with fake zero data else: # x = x[0:140000] # debug: comment out rms = np.sqrt(np.mean(np.square(x))) x = 1. * x / rms * target_rms + np.random.rand( x.shape[0]) * 1e-20 # adding noise for files with fake zero data num_frames = x.shape[0] print "\tsample rate: ", fs, "\n\t# samples: ", num_frames, "\n\t# channels: ", num_chan return x, fs, num_frames
def open_wavefile(filename, target_rms=.01): print "READING: " + filename try: wav = wavio.read(filename) fs, wavefile = wav.rate, wav.data # [fs, width, wavefile] = readwav(filename) except: print traceback.format_exc() print "ERROR: could not read file" sys.exit(1) x = np.array(wavefile, dtype=float) x = x*(2**-15) # normalizing to match MATLAB double representation # print "\nfirst few samples of x:\n", x[0:5,:] dim = x.shape num_chan = 1 # normalize if len(dim) > 1: num_chan = dim[1] for c in range(0, num_chan): rms = np.sqrt(np.mean(np.square(x[:, c]))) x[:, c] = 1. * x[:, c] / rms * target_rms + np.random.rand(x.shape[0])*1e-20 # adding noise for files with fake zero data else: # x = x[0:140000] # debug: comment out rms = np.sqrt(np.mean(np.square(x))) x = 1. * x / rms * target_rms + np.random.rand(x.shape[0])*1e-20 # adding noise for files with fake zero data num_frames = x.shape[0] print "\tsample rate: ", fs, "\n\t# samples: ", num_frames, "\n\t# channels: ", num_chan return x, fs, num_frames
def __init__(self, file=""): """Read the audio file and save all the important data""" self.fileName = file if file != "": self.wav = wavio.read(file) # framerate self.fs = self.wav.rate # number of bytes per sample self.bytes = self.wav.sampwidth # track data self.data = np.array(self.wav.data) #track data as float self.floatData = int_to_float(self.data, self.bytes) # dimensions of the data which is (nSamples, nChannels) # nSamples: number of samples in a file # nChannels: number of channels, 1 - mono, 2 - stereo (self.nSamples, self.nChannels) = np.shape(self.data) # length of a file in seconds self.length = self.nSamples / self.fs
def ExtractCompressedAudioNTS(fileName): # Not Thread Safe!! os.system('mkdir -p temp') tempWav = 'temp/temp.wav' tempRaw = 'temp/temp.raw' sampleRate = 48000 if True: cmd = '%s -y -i "%s" -ac 1 -ar %d %s' % ( ffmpegEXE, fileName, sampleRate, tempWav) print(cmd) sys.stdout.flush() os.system(cmd) dw = wavio.read(tempWav) # sampleRate=dw.rate cmd = '%s -y -i "%s" -f f32le -c:a pcm_f32le -ac 1 -ar %d %s' % ( ffmpegEXE, fileName, sampleRate, tempRaw) print(cmd) sys.stdout.flush() os.system(cmd) data = numpy.fromfile(tempRaw, dtype=numpy.dtype('<f')) return (data, sampleRate)
def create_dataset(src_path, esc50_dst_path, esc10_dst_path): print('* {} -> {}'.format(src_path, esc50_dst_path)) print('* {} -> {}'.format(src_path, esc10_dst_path)) esc10_classes = [0, 10, 11, 20, 38, 21, 40, 41, 1, 12] # ESC-10 is a subset of ESC-50 esc50_dataset = {} esc10_dataset = {} for fold in range(1, 6): esc50_dataset['fold{}'.format(fold)] = {} esc50_sounds = [] esc50_labels = [] esc10_dataset['fold{}'.format(fold)] = {} esc10_sounds = [] esc10_labels = [] #wavを読み込み for wav_file in sorted(glob.glob(os.path.join(src_path, '{}-*.wav'.format(fold)))): sound = wavio.read(wav_file).data.T[0] start = sound.nonzero()[0].min() end = sound.nonzero()[0].max() sound = sound[start: end + 1] # Remove silent sections label = int(os.path.splitext(wav_file)[0].split('-')[-1]) esc50_sounds.append(sound) esc50_labels.append(label) if label in esc10_classes: esc10_sounds.append(sound) esc10_labels.append(esc10_classes.index(label)) esc50_dataset['fold{}'.format(fold)]['sounds'] = esc50_sounds esc50_dataset['fold{}'.format(fold)]['labels'] = esc50_labels esc10_dataset['fold{}'.format(fold)]['sounds'] = esc10_sounds esc10_dataset['fold{}'.format(fold)]['labels'] = esc10_labels #保存形式は辞書->array->辞書->array np.savez(esc50_dst_path, **esc50_dataset) np.savez(esc10_dst_path, **esc10_dataset)
def get_new_map(self, filename): self.wave = wavio.read(filename) self.left_channel = self.wave.data[:, 0] self.right_channel = self.wave.data[:, 1] return
def load_data(wave_path): def normalize(x): m = np.max(np.abs(x)) return x / m def tomono(x): return (x[:, 0] + x[:, 1]) / 2 def downsample3(sig, Nwin=32): win = firwin(numtaps=Nwin, cutoff=0.55) new_sig = sig.copy() new_sig = np.convolve(new_sig, win, 'same') new_sig = new_sig[2::3] return new_sig def toint16(x): return np.int16(x * (2 ** 15)) # Load data wavobj = read(wave_path) fs = wavobj.rate # Preprocess data waveform = wavobj.data.copy() waveform = normalize(waveform) if type == 'solo': waveform = waveform[:, 0] else: waveform = tomono(waveform) waveform = downsample3(waveform) waveform = toint16(waveform) if len(waveform.shape) == 1: waveform = np.reshape(waveform, [1, len(waveform)]) return waveform, fs
def loadFile(filename): # Load any previous segments stored if os.path.isfile(filename + '.data'): file = open(filename + '.data', 'r') segments = json.load(file) file.close() if len(segments) > 0: if segments[0][0] == -1: del segments[0] else: return None, None, 0, 0, 0, 0 else: return None, None, 0, 0, 0, 0 if os.stat(filename).st_size != 0: # avoid files with no data (Tier 1 has 0Kb .wavs) wavobj = wavio.read(filename) # Parse wav format details based on file header: sampleRate = wavobj.rate audiodata = wavobj.data minFreq = 0 maxFreq = sampleRate / 2. fileLength = wavobj.nframes if audiodata.dtype is not 'float': audiodata = audiodata.astype('float') # / 32768.0 if np.shape(np.shape(audiodata))[0] > 1: audiodata = audiodata[:, 0] datalength = np.shape(audiodata)[0] datalengthSec = datalength / sampleRate #print("Length of file is ", datalengthSec, " seconds (", datalength, "samples) loaded from ", fileLength / sampleRate, "seconds (", fileLength, " samples) with sample rate ",sampleRate, " Hz.") return segments, audiodata, sampleRate, minFreq, maxFreq, datalengthSec
def read_audio(filename): wav = wavio.read(filename) input_audio = wav.data[:, 0] input_audio = input_audio / np.max(np.abs(input_audio), axis=0) fs = wav.rate print("Frecuencia de muestreo", fs) return input_audio, fs
def main(tempo, pattern): wav = wavio.read('metronome.wav') sample_rate = wav.rate beat_duration = 60 / tempo quarter_note = int(sample_rate * beat_duration) pattern = pattern.split('.') audio = wav.data audio = audio[:quarter_note] x = np.linspace(0, 2 * beat_duration * np.pi, int(beat_duration * sample_rate)) notes = [] for p in pattern: if p == '0': n = np.zeros_like(audio) n = n.astype(np.int16) elif p == '1': n = audio.astype(np.int16) notes.append(n) measure = np.concatenate([n for n in notes]) sd.play(measure, sample_rate)
def get_status(self, seconds=20, pps=100): status = {} if os.path.exists(self.file_name): wav = wavio.read(self.file_name) rate = wav.rate data = wav.data status.update({ "record_length": (len(data) / rate) * 1000, "name": self.name, "filename": self.file_name }) waveform_points = [] waveform_times = [] points = seconds * pps samples = int(seconds * rate) data0 = data[:, 0][-samples:] parts = np.array_split(data0, points) waveform_points = np.average(parts, 1) waveform_points = list(waveform_points / ((2**self.bitdepth) / 2)) waveform_times = list(np.arange(seconds, 0, -(seconds / points))) status.update({ "waveform": waveform_points, "times": waveform_times }) else: status.update({"error": "file doesn't exist (yet)"}) return status
def readwav(path): Struct = wavio.read(path) #读取一个WAV文件并返回一个保存采样率,采样宽度(以字节为单位)和包含数据的numpy数组的对象。 wav = Struct.data.astype(float) / np.power(2, Struct.sampwidth*8-1)#np.power(a,b)求a的b次方 # print(Struct.sampwidth) fs = Struct.rate return wav, fs
def __getitem__(self, index): 'Generates one sample of data' # Select sample ID = self.list_IDs[index] y = self.labels[index] assert y <= self.labels.max() # Load data and get label if y == 0: main_path = '/vol/hinkelstn/data/FILTERED/atrial_fibrillation_8k/' # main_path = '/data/bhosseini/hinkelstn/FILTERED/atrial_fibrillation_8k/' else: main_path = '/vol/hinkelstn/data/FILTERED/sinus_rhythm_8k/' # main_path = '/data/bhosseini/hinkelstn/FILTERED/sinus_rhythm_8k/' # list_f = os.listdir(main_path) path = main_path + ID w = wavio.read(path) w_zm = stats.zscore(w.data, axis=0, ddof=1) # X = w.data.transpose(1,0) if self.t_range: X = torch.tensor(w_zm[self.t_range, :].transpose(1, 0)).float() else: X = torch.tensor(w_zm.transpose(1, 0)).float() # X = torch.tensor(w.data.transpose(1,0)).view(1,2,X.shape[1]) y = torch.tensor(y).long() # y = torch.tensor(y).view(1,1,1) # data_tensor = TensorDataset(X.float(),y.long()) return X, y
def testMC(): import wavio import pyqtgraph as pg from pyqtgraph.Qt import QtCore, QtGui #wavobj = wavio.read('Sound Files/kiwi_1min.wav') wavobj = wavio.read('Sound Files/tril1.wav') fs = wavobj.rate data = wavobj.data#[:20*fs] if data.dtype is not 'float': data = data.astype('float') #/ 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] import SignalProc sp = SignalProc.SignalProc(data,fs,256,128) sg = sp.spectrogram(data=data,window_width=256,incr=128,window='Hann',mean_normalise=True,onesided=True,multitaper=False,need_even=False) s = Segment(data,sg,sp,fs) #print np.shape(sg) #s1 = s.medianClip() s1,p,t = s.yin(returnSegs=True) app = QtGui.QApplication([]) mw = QtGui.QMainWindow() mw.show() mw.resize(800, 600) win = pg.GraphicsLayoutWidget() mw.setCentralWidget(win) vb1 = win.addViewBox(enableMouse=False, enableMenu=False, row=0, col=0) im1 = pg.ImageItem(enableMouse=False) vb1.addItem(im1) im1.setImage(10.*np.log10(sg)) # vb2 = win.addViewBox(enableMouse=False, enableMenu=False, row=1, col=0) # im2 = pg.ImageItem(enableMouse=False) # vb2.addItem(im2) # im2.setImage(c) vb3 = win.addViewBox(enableMouse=False, enableMenu=False, row=1, col=0) im3 = pg.ImageItem(enableMouse=False) vb3.addItem(im3) im3.setImage(10.*np.log10(sg)) vb4 = win.addViewBox(enableMouse=False, enableMenu=False, row=2, col=0) im4 = pg.PlotDataItem(enableMouse=False) vb4.addItem(im4) im4.setData(data) for seg in s1: a = pg.LinearRegionItem() a.setRegion([convertAmpltoSpec(seg[0],fs,128), convertAmpltoSpec(seg[1],fs,128)]) #a.setRegion([seg[0],seg[1]]) vb3.addItem(a, ignoreBounds=True) QtGui.QApplication.instance().exec_()
def showNoiseEnergies(): import pylab as pl import SignalProc #sp = SignalProc.SignalProc(data5, sampleRate) pl.ion() tbd = [0, 1, 3, 7, 15, 31] #tbd = np.concatenate([np.arange(30),np.arange(50,63)]) #tbd = np.arange(50) listnodes = np.arange(63) listnodes = np.delete(listnodes, tbd) for root, dirs, files in os.walk(str('Sound Files/Noise examples/Noise_10s')): for filename in files: if filename.endswith('.wav'): filename = root + '/' + filename wavobj = wavio.read(filename) sampleRate = wavobj.rate data = wavobj.data if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = np.squeeze(data[:, 0]) pl.figure() e1 = WaveletSegment.computeWaveletEnergy_1s(data,'dmey2') pl.plot(e1[listnodes]) pl.title(filename)
def wav2features(wav_file: str, parameters: dict, normalize: bool = True, allow_zero_padding: bool = False): ''' Extract audio features from a wave file. Keyword Parameters: =================== - wave_file (str) -- wave file path - parameters (dict) -- audio and feature parameters used to extract features - normalize (bool) -- normalize audio before processing (default True) - allow_zero_padding (bool) -- add zeros if input audio features are too short (default False) Returns ======= features (np.array) -- return an np.array of size parameters['shape'] ### Raises - ValueError -- Wrong parameters ''' output_shape = parameters['shape'] features_param = parameters['features_param'] if features_param['feature_type'] not in supported_features: raise ValueError( "Unsupported feature type {}, supported features are: {}".format( features_param['feature_type'], supported_features)) try: with open(wav_file, 'rb') as fp: wav = wavio.read(fp) except: raise FileNotFoundError("File not found") if wav.data.dtype != np.int16: raise ValueError('Unsupported data type: ' + str(wav.data.dtype)) if wav.rate != parameters['sample_rate']: raise ValueError('Unsupported sample rate: ' + str(wav.rate)) signal = np.squeeze(wav.data) if 'emphasis' in parameters and parameters['emphasis'] is not None: signal = pre_emphasis(signal, parameters['emphasis']) if normalize: signal = signal.astype(np.float32) / abs(float(np.iinfo(np.int16).min)) features = signal2features(signal, parameters, features_param) # Check output length if len(features) < output_shape[0]: if allow_zero_padding: features = np.vstack((np.zeros((output_shape[0] - len(features), len(features[0]))), features)) else: return None if len(features) > output_shape[0]: features = features[:output_shape[0]] return features
def check_wavio_read(self, filename, rate, sampwidth, dtype, shape, data): w = wavio.read(filename) self.assertEqual(w.rate, rate) self.assertEqual(w.sampwidth, sampwidth) self.assertEqual(w.data.dtype, dtype) self.assertEqual(w.data.shape, shape) np.testing.assert_equal(w.data, data)
def __getitem__(self, index): sample = self.df.iloc[[index]] sound = wavio.read(self.audio_path+str(sample.iloc[0, 0])).data.T[0] start = sound.nonzero()[0].min() end = sound.nonzero()[0].max() sound = sound[start: end + 1] sound = self.random_crop(sound) sound = self.random_gain(sound) if not self.istest: sound = self.pitch_change(sound) sound = self.speed_change(sound) sound = self.noiser(sound) sound = self.pad(sound) sound = self.normalize(sound) label = np.zeros(self.classes) label[sample.iloc[0, 1]] = 1.0 #label = np.array([sample.iloc[0,1]], np.int32) lab = np.array(sample.iloc[0, 1]) sound, label = self.tensor(sound, label) return (sound, lab)
def readWav(self, file, len=None, off=0, silent=False): """ Args the same as for wavio.read: filename, length in seconds, offset in seconds. """ wavobj = wavio.read(file, len, off) self.data = wavobj.data # take only left channel if np.shape(np.shape(self.data))[0] > 1: self.data = self.data[:, 0] self.audioFormat.setChannelCount(1) # force float type if self.data.dtype != 'float': self.data = self.data.astype('float') self.audioFormat.setSampleSize(wavobj.sampwidth * 8) # total file length in s read from header (useful for paging) self.fileLength = wavobj.nframes self.sampleRate = wavobj.rate self.audioFormat.setSampleRate(self.sampleRate) # *Freq sets hard bounds, *Show can limit the spec display self.minFreq = 0 self.maxFreq = self.sampleRate // 2 self.minFreqShow = max(self.minFreq, self.minFreqShow) self.maxFreqShow = min(self.maxFreq, self.maxFreqShow) if not silent: print("Detected format: %d channels, %d Hz, %d bit samples" % (self.audioFormat.channelCount(), self.audioFormat.sampleRate(), self.audioFormat.sampleSize()))
def check_assertions(self, expectation: Expectation) -> None: """Simplify assertion checks for wave method.""" self.hash.wave( filename=expectation['filename'], key=expectation['key'], note_duration=expectation['note_duration'], sample_rate=expectation['sample_rate']) self.assertTrue( os.path.isfile(expectation['filename']), 'File not created.') wave_file = wavio.read(expectation['filename']) self.assertEqual( wave_file.rate, expectation['sample_rate'], 'Sample rate of wave file does not match input rate.') self.assertEqual( len(wave_file.data), len(expectation['notes']) * int( expectation['note_duration'] * expectation['sample_rate']), 'Output tune is not the correct length.') samples = numpy.squeeze(wave_file.data) for note, frequency in enumerate(expectation['notes']): spectrum = numpy.fft.fft( samples[note * int( expectation['sample_rate'] * expectation['note_duration']):( note + 1) * int( expectation['sample_rate'] * expectation['note_duration'])]) spectral_density = 10 * numpy.log10( numpy.absolute(numpy.square(spectrum[:int(spectrum.size/2)]))) self.assertGreater( spectral_density[int( frequency * expectation['note_duration'])], numpy.average(spectral_density), 'Output pitch is not present within the expected interval.')
def main(path="../../data/8bit-C4.wav"): wav_control = wavio.read(path) print(wav_control) # creat signal signals = to_signal(wav_control) signal = signals[0] signal.plot()
def feature_extract(path, feature_extraction, window, stride, feat_size, nfilt, nfft, lowfreq, preemph, samplerate): print(path) tmp = [] highfreq = samplerate / 2 appendEnergy = True ceplifter = nfilt w = wavio.read(path) normalized_w = w.data / (2**16. / 2) if ('Hamming' in feature_extraction): winfunc = lambda x: np.hamming(x) else: winfunc = lambda x: np.ones((x, )) # 取abs_fft if ('mfcc' in feature_extraction): print('mfcc') tmp = demfcc.mfcc_(signal=normalized_w, samplerate=samplerate, winlen=window * 0.001, winstep=stride * 0.001, numcep=feat_size, nfilt=nfilt, nfft=nfft, lowfreq=lowfreq, highfreq=highfreq, preemph=preemph, ceplifter=ceplifter, appendEnergy=appendEnergy, winfunc=winfunc) elif ('fft_sam' in feature_extraction): print('fft_sam') tmp = demfcc.fft_sam(signal=normalized_w, samplerate=samplerate, winlen=window * 0.001, winstep=stride * 0.001, nfft=nfft, preemph=preemph, winfunc=winfunc) elif ('log_abs_mel' in feature_extraction): print('log_abs_mel') tmp = demfcc.log_abs_mel(signal=normalized_w, samplerate=samplerate, winlen=window * 0.001, winstep=stride * 0.001, nfilt=nfilt, nfft=nfft, lowfreq=lowfreq, highfreq=highfreq, preemph=preemph, winfunc=winfunc) if ('recurr' in feature_extraction): pair_len = int(re.search('@(.*?)_', feature_extraction).group(1)) pair_step = int(re.search('~(.*?)_', feature_extraction).group(1)) numpairs = math.floor((tmp.shape[0] - pair_len) / pair_step) + 1 indices = np.tile(np.arange(0, pair_len), (numpairs, 1)) + np.tile( np.arange(0, numpairs * pair_step, pair_step), (pair_len, 1)).T indices = np.array(indices, dtype=np.int32) tmp = tmp[indices] return np.float32(tmp)
def Rawdata_loading(self,filename,N): wav = wavio.read(filename) bit = 8*wav.sampwidth data = wav.data / float( 2**(bit-1) ) # -1.0 to 1.0(normalize) g = data[:,0] print(len(g)) fs = wav.rate return g,fs
def detectClicks(): import SignalProc reload(SignalProc) import pyqtgraph as pg from pyqtgraph.Qt import QtCore, QtGui import wavio from scipy.signal import medfilt #wavobj = wavio.read('Sound Files/tril1.wav') #wavobj = wavio.read('Sound Files/010816_202935_p1.wav') #wavobj = wavio.read('Sound Files/20170515_223004 piping.wav') wavobj = wavio.read('Sound Files/test/DE66_BIRD_141011_005829.wav') #wavobj = wavio.read('/Users/srmarsla/DE66_BIRD_141011_005829_wb.wav') #wavobj = wavio.read('/Users/srmarsla/ex1.wav') #wavobj = wavio.read('/Users/srmarsla/ex2.wav') fs = wavobj.rate data = wavobj.data #[:20*fs] if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] import SignalProc sp = SignalProc.SignalProc(data,fs,256,128) sg = sp.spectrogram(data,multitaper=False) s = Segment(data, sg, sp, fs, 50) energy = np.sum(sg,axis=1) energy = medfilt(energy,15) e2 = np.percentile(energy,95)*2 # Step 1: clicks have high energy clicks = np.squeeze(np.where(energy>e2)) clicks = s.identifySegments(clicks, minlength=1) app = QtGui.QApplication([]) mw = QtGui.QMainWindow() mw.show() mw.resize(800, 600) win = pg.GraphicsLayoutWidget() mw.setCentralWidget(win) vb1 = win.addViewBox(enableMouse=False, enableMenu=False, row=0, col=0) im1 = pg.ImageItem(enableMouse=False) vb1.addItem(im1) im1.setImage(10.*np.log10(sg)) for seg in clicks: a = pg.LinearRegionItem() a.setRegion([convertAmpltoSpec(seg[0],fs,128), convertAmpltoSpec(seg[1],fs,128)]) vb1.addItem(a, ignoreBounds=True) QtGui.QApplication.instance().exec_()
def fFrq(dirName): for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.wav'): fileName = root + '/' + file wavobj = wavio.read(fileName) sampleRate = wavobj.rate data = wavobj.data # None of the following should be necessary for librosa if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] sp = SignalProc.SignalProc([], 0, 512, 256) sgRaw = sp.spectrogram(data, 512, 256, mean_normalise=True, onesided=True, multitaper=False) segment = Segment.Segment(data, sgRaw, sp, sampleRate, 512, 256) pitch, y, minfreq, W = segment.yin(minfreq=100) ind = np.squeeze(np.where(pitch > minfreq)) pitch = pitch[ind] if pitch.size == 0: print(file, ' *++ no fundamental freq detected, could be faded kiwi or noise') continue ind = ind * W / 512 x = (pitch * 2. / sampleRate * np.shape(sgRaw)[1]).astype('int') from scipy.signal import medfilt x = medfilt(pitch, 15) if ind.size <2: if pitch>850 and pitch<4500: print(file, round(pitch), ' *##kiwi found') else: print(file, round(pitch), ' *-- fundamental freq is out of kiwi region, could be noise') else: # Get the individual pieces segs = segment.identifySegments(ind, maxgap=10, minlength=5) count = 0 if segs == []: if np.mean(pitch)>850 and np.mean(pitch)<4500: print(file, round(np.mean(pitch)), ' *## kiwi found ') else: print(file, round(np.mean(pitch)), ' *-- fundamental freq is out of kiwi region, could be noise') for s in segs: count += 1 s[0] = s[0] * sampleRate / float(256) s[1] = s[1] * sampleRate / float(256) i = np.where((ind > s[0]) & (ind < s[1])) if np.mean(x[i])>850 and np.mean(x[i])<4500: print(file, round(np.mean(x[i])), ' *## kiwi found ##') else: print(file, round(np.mean(x[i])), ' *-- fundamental freq is out of kiwi region, could be noise')
def eRatio(dirName): for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.wav'): wavobj = wavio.read(root + '\\' + file) sampleRate = wavobj.rate data = wavobj.data if data is not 'float': data = data.astype('float') # data / 32768.0 if np.shape(np.shape(data))[0] > 1: data = data[:, 0] post = SupportClasses.postProcess(data, sampleRate, []) print(file, post.eRatioConfd(seg=None))
def loadData(self,fName, trainPerFile=False, wavOnly=False, savedetections=False): # Load data filename = fName+'.wav' #'train/kiwi/train1.wav' filenameAnnotation = fName+'-sec.txt'#'train/kiwi/train1-sec.txt' try: wavobj = wavio.read(filename) except: print("unsupported file: ", filename) pass self.sampleRate = wavobj.rate self.data = wavobj.data if self.data.dtype is not 'float': self.data = self.data.astype('float') #/ 32768.0 if np.shape(np.shape(self.data))[0]>1: self.data = np.squeeze(self.data[:,0]) n=math.ceil(len(self.data)/self.sampleRate) if not wavOnly: fileAnnotations = [] # Get the segmentation from the txt file with open(filenameAnnotation) as f: reader = csv.reader(f, delimiter="\t") d = list(reader) if d[-1]==[]: d = d[:-1] if len(d) != n: print("ERROR: annotation length %d does not match file duration %d!" %(len(d), n)) self.annotation = None return # for each second, store 0/1 presence: sum = 0 for row in d: fileAnnotations.append(int(row[1])) sum += int(row[1]) # TWO VERSIONS FOR COMPATIBILITY WITH BOTH TRAINING LOOPS: if trainPerFile: self.annotation = np.array(fileAnnotations) else: self.annotation.extend(fileAnnotations) self.filelengths.append(n) if savedetections: self.filenames.append(filename) print("%d blocks read, %d presence blocks found. %d blocks stored so far.\n" % (n, sum, len(self.annotation)))
def length(dirName): """ """ durations = [] for root, dirs, files in os.walk(str(dirName)): for filename in files: if filename.endswith('.wav'): filename = root + '/' + filename wavobj = wavio.read(filename) sampleRate = wavobj.rate data = wavobj.data duration = len(data) / sampleRate # number of secs durations.append(duration) print("min duration: ", min(durations), " secs") print("max duration: ", max(durations), " secs") print("mean duration: ", np.mean(durations), " secs") print("median duration: ", np.median(durations), " secs") print("total duration: ", sum(durations), " secs")
def read_aifs_or_wavs(in_dir, exts=['aif','wav'], module='G0', mix=False, trim=False, norm=False, phase=False, rev=False, fade=256, target={'G0':500000,'S0':200000,'W0':4000,'C0':12000}): audio_files = [] for ext in exts: audio_files += glob.glob(in_dir+'/*.'+ext) #load the extensions that we want data,err,ns = [],[],[] for audio_file in audio_files: try: print('processing %s'%audio_file) #search for aif style file extension is_aif = audio_file.rsplit('.')[-1].upper().find('AIF')>-1 is_wav = audio_file.rsplit('.')[-1].upper().find('WAV')>-1 if not is_aif and not is_wav: #extension not supported ns += [audio_file] else: if is_aif: mono,rate = dsp.multi_to_mono(aifcio.read(audio_file),mix) #convert to mono elif is_wav: mono,rate = dsp.multi_to_mono(wavio.read(audio_file),mix) #convert to mono if trim: mono = dsp.trim(mono) if phase: mono = dsp.phase_vocoder(mono,rate,1024,1.0*target[module]/rate) #timestretching via PV resampled = dsp.resample(mono,target,module) #up/down sample if norm: resampled = dsp.normalize(resampled) #normalize and clean final result if fade > 0: resampled = dsp.fade_out(resampled,fade) #exp fade out if rev: resampled = dsp.reverse(resampled) #option reverse data += [resampled] print('---------------------------------------------------') except Exception: err += [audio_file] pass if len(err)>0: print('Conversion errors with the following supported files:') for i in err: print i if len(ns)>0: print('The following files have unsupported file types:') for i in ns: print i return data
def loadFile(filename): wavobj = wavio.read(filename) sampleRate = wavobj.rate audiodata = wavobj.data # None of the following should be necessary for librosa if audiodata.dtype is not 'float': audiodata = audiodata.astype('float') #/ 32768.0 if np.shape(np.shape(audiodata))[0]>1: audiodata = audiodata[:,0] # if sampleRate != 16000: # audiodata = librosa.core.audio.resample(audiodata, sampleRate, 16000) # sampleRate=16000 # pre-process sc = SupportClasses.preProcess(audioData=audiodata, sampleRate=sampleRate, species='Kiwi', df=False) audiodata,sampleRate = sc.denoise_filter() return audiodata,sampleRate
def test1(self): path = tempfile.mkdtemp() filename = os.path.join(path, "test1data.wav") wavio.write(filename, data1, 44100, sampwidth=3) try: f = wave.open(filename, 'r') self.assertEqual(f.getnchannels(), 1) self.assertEqual(f.getsampwidth(), 3) self.assertEqual(f.getframerate(), 44100) f.close() w = wavio.read(filename) self.assertEqual(w.rate, 44100) self.assertEqual(w.sampwidth, 3) self.assertEqual(w.data.dtype, np.int32) self.assertEqual(w.data.shape, (len(data1), 1)) np.testing.assert_equal(w.data[:, 0], data1) finally: os.remove(filename) os.removedirs(path)
def main(args): # setup gui app = wx.App() fr = wx.Frame(None, title='RADAR Spectrogram') fr.SetSize((SIZE,SIZE)) panel = SpectroPanel(fr) # turn on sound card p = pyaudio.PyAudio() # stream = p.open(format=pyaudio.paInt16, # channels=1, # rate=TS, # output=False, # input=True, # frames_per_buffer=CHUNK, # stream_callback=panel.callback) # start stuff # stream.start_stream() wav = wavio.read(args.filename) data = wav.data rate = wav.rate if data.shape[1] > 1: data = data[:,1] else: data = data.transpose() if rate != TS: data = sig.decimate(data,rate/TS,ftype='fir') stopFlag = threading.Event() thread = WavThread(stopFlag,data,panel) thread.start() fr.Show() app.MainLoop() # stream.stop_stream() # stream.close() stopFlag.set()
def extractSegments(wavFile, destination, copyName, species): """ This extracts the sound segments given the annotation and the corresponding wav file. (Isabel's experiment data extraction) """ datFile=wavFile+'.data' try: wavobj = wavio.read(wavFile) sampleRate = wavobj.rate data = wavobj.data if os.path.isfile(datFile): with open(datFile) as f: segments = json.load(f) cnt = 1 for seg in segments: if seg[0] == -1: continue if copyName: # extract all - extracted sounds are saved with the same name as the corresponding segment in the annotation (e.g. Rawhiti exp.) filename = destination + '\\' + seg[4] + '.wav' s = int(seg[0] * sampleRate) e = int(seg[1] * sampleRate) temp = data[s:e] wavio.write(filename, temp.astype('int16'), sampleRate, scale='dtype-limits', sampwidth=2) elif not species: # extract all - extracted sounds are saved with the original file name followed by an index starting 1 ind = wavFile.rindex('/') filename = destination + '\\' + str(wavFile[ind + 1:-4]) + '-' + str(cnt) + '.wav' cnt += 1 s = int(seg[0] * sampleRate) e = int(seg[1] * sampleRate) temp = data[s:e] wavio.write(filename, temp.astype('int16'), sampleRate, scale='dtype-limits', sampwidth=2) elif species == seg[4]: # extract only specific calls - extracted sounds are saved with with the original file name followed by an index starting 1 ind = wavFile.rindex('/') ind2 = wavFile.rindex('\\') filename = destination + '\\' + str(wavFile[ind2+1:ind]) + '-' + str(wavFile[ind + 1:-4]) + '-' + str(seg[4]) + '-' + str(cnt) + '.wav' cnt += 1 s = int((seg[0]-1) * sampleRate) e = int((seg[1]+1) * sampleRate) temp = data[s:e] wavio.write(filename, temp.astype('int16'), sampleRate, scale='dtype-limits', sampwidth=2) except: print ("unsupported file: ", wavFile)
def resample(dirName): """ Resample to avoid high frq noise """ for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.wav'): # go through each segment file = root + '/' + file wavobj = wavio.read(file) audioData = wavobj.data if audioData.dtype is not 'float': audioData = audioData.astype('float') # / 32768.0 if np.shape(np.shape(audioData))[0] > 1: audioData = np.squeeze(audioData[:, 0]) sampleRate = wavobj.rate import librosa if sampleRate != 16000: audioData = librosa.core.audio.resample(audioData, sampleRate, 16000) sampleRate = 16000 wavio.write(file[:-4] + '_down.wav', audioData.astype('int16'), sampleRate, scale='dtype-limits', sampwidth=2)
def create_dataset(src_path, esc50_dst_path): print('* {} -> {}'.format(src_path, esc50_dst_path)) esc50_dataset = {} for fold in range(1, 6): esc50_dataset['fold{}'.format(fold)] = {} esc50_sounds = [] esc50_labels = [] for wav_file in sorted(glob.glob(os.path.join(src_path, '{}-*.wav'.format(fold)))): sound = wavio.read(wav_file).data.T[0] #start = sound.nonzero()[0].min() #end = sound.nonzero()[0].max() # sound = sound[start: end + 1] # Remove silent sections label = int(os.path.splitext(wav_file)[0].split('-')[-1]) esc50_sounds.append(sound) esc50_labels.append(label) esc50_dataset['fold{}'.format(fold)]['sounds'] = esc50_sounds esc50_dataset['fold{}'.format(fold)]['labels'] = esc50_labels np.savez(esc50_dst_path, **esc50_dataset)
def test_clip(self): path = tempfile.mkdtemp() filename = os.path.join(path, "testdata.wav") data = np.array([-100, 0, 100, 200, 300, 325]) wavio.write(filename, data, 44100, sampwidth=1, scale='none') try: f = wave.open(filename, 'r') self.assertEqual(f.getnchannels(), 1) self.assertEqual(f.getsampwidth(), 1) self.assertEqual(f.getframerate(), 44100) f.close() w = wavio.read(filename) self.assertEqual(w.rate, 44100) self.assertEqual(w.sampwidth, 1) self.assertEqual(w.data.dtype, np.uint8) self.assertEqual(w.data.shape, (len(data), 1)) expected = np.array([0, 0, 100, 200, 255, 255], dtype=np.uint8).reshape(-1, 1) np.testing.assert_equal(w.data, expected) finally: os.remove(filename) os.removedirs(path)
def test5(self): path = tempfile.mkdtemp() filename = os.path.join(path, "test5data.wav") data = np.zeros(32, dtype=np.int16) data[1::4] = 10000 data[3::4] = -10000 wavio.write(filename, data, 44100, sampwidth=2, scale='none') try: f = wave.open(filename, 'r') self.assertEqual(f.getnchannels(), 1) self.assertEqual(f.getsampwidth(), 2) self.assertEqual(f.getframerate(), 44100) f.close() w = wavio.read(filename) self.assertEqual(w.rate, 44100) self.assertEqual(w.sampwidth, 2) self.assertEqual(w.data.dtype, np.int16) self.assertEqual(w.data.shape, (32, 1)) np.testing.assert_equal(w.data, data.reshape(-1, 1)) finally: os.remove(filename) os.removedirs(path)
def readwav( path ): Struct = wavio.read( path ) wav = Struct.data.astype(float) / np.power(2, Struct.sampwidth*8-1) fs = Struct.rate return wav, fs
def showEnergies(): import pylab as pl pl.ion() #filename = 'Sound Files/tril1_d1.wav' filename = 'Sound Files/tril1.wav' #filename = 'Sound Files/090811_184501.wav' #filename = 'Sound Files/kiwi_1min.wav' wavobj = wavio.read(filename) sampleRate = wavobj.rate data = wavobj.data if data.dtype is not 'float': data = data.astype('float') # / 32768.0 if np.shape(np.shape(data))[0] > 1: data = np.squeeze(data[:, 0]) if os.path.isfile(filename + '.data'): file = open(filename + '.data', 'r') segments = json.load(file) file.close() if len(segments) > 0: if segments[0][0] == -1: del segments[0] data1 = data[int(segments[0][0]*sampleRate):int(segments[0][1]*sampleRate)] data2 = data[int(segments[1][0]*sampleRate):int(segments[1][1]*sampleRate)] data3 = data[int(segments[2][0]*sampleRate):int(segments[2][1]*sampleRate)] data4 = data[int(segments[3][0]*sampleRate):int(segments[3][1]*sampleRate)] data5 = data[int(segments[4][0]*sampleRate):int(segments[4][1]*sampleRate)] import SignalProc sp = SignalProc.SignalProc(data5, sampleRate) pl.figure() pl.subplot(5, 1, 1) sg = sp.spectrogram(data1,sampleRate) pl.imshow(10.*np.log10(sg)) pl.subplot(5, 1, 2) sg = sp.spectrogram(data2,sampleRate) pl.imshow(10.*np.log10(sg)) pl.subplot(5, 1, 3) sg = sp.spectrogram(data3,sampleRate) pl.imshow(10.*np.log10(sg)) pl.subplot(5, 1, 4) sg = sp.spectrogram(data4,sampleRate) pl.imshow(10.*np.log10(sg)) pl.subplot(5, 1, 5) sg = sp.spectrogram(data5,sampleRate) pl.imshow(10.*np.log10(sg)) pl.figure() e1 = WaveletSegment.computeWaveletEnergy_1s(data1,'dmey2') pl.subplot(5,1,1) pl.plot(e1) e2 = WaveletSegment.computeWaveletEnergy_1s(data2,'dmey2') pl.subplot(5,1,2) pl.plot(e2) e3 = WaveletSegment.computeWaveletEnergy_1s(data3,'dmey2') pl.subplot(5,1,3) pl.plot(e3) e4 = WaveletSegment.computeWaveletEnergy_1s(data4,'dmey2') pl.subplot(5,1,4) pl.plot(e4) e5 = WaveletSegment.computeWaveletEnergy_1s(data5,'dmey2') pl.subplot(5,1,5) pl.plot(e5) pl.figure() pl.plot(e1) pl.plot(e2) pl.plot(e3) pl.plot(e4) pl.plot(e5) #return e2 pl.show()
wavelet = pywt.Wavelet(filter_bank=[lowd, highd, lowr, highr]) wavelet.orthogonal=True for t in range(totalTime): E = [] for level in range(1, 6): wp = pywt.WaveletPacket(data=fwData[t * sampleRate:(t + 1) * sampleRate], wavelet=wavelet, maxlevel=level) e = np.array([np.sum(n.data ** 2) for n in wp.get_level(level, "natural")]) if np.sum(e) > 0: e = 100.0 * e / np.sum(e) E = np.concatenate((E, e), axis=0) coefs[:, t] = E return coefs import wavio wavobj = wavio.read('Sound Files/tril1.wav') sampleRate = wavobj.rate data = np.squeeze(wavobj.data) if data.dtype is not 'float': data = data.astype('float') #/ 32768.0 if np.shape(np.shape(data))[0]>1: data = np.squeeze(data[:,0]) coefs = computeWaveletEnergy(data, sampleRate) clf = joblib.load('ruruClassifier.pkl') out=[] for i in range(int(np.shape(coefs)[1])): E = np.ones((1,62)) * coefs[:,i] p = clf.predict(E) # if p==0.0:
def main(): files = [ # "/home/nhilton/development/nsound/src/examples/california.wav", # "/home/nhilton/development/nsound/src/examples/mynameis.wav", # "/home/nhilton/development/nsound/src/examples/Temperature_in.wav", # "/home/nhilton/development/nsound/src/examples/walle.wav", # "/home/nhilton/development/nsound/src/examples/example1", # "empty.bin", "chirp1.wav", ] for i, f in enumerate(files): print('-------------------------------------------------------') print('Reading file') print(' in: %s' % f) try: chunks = wavio.read_chunks(f) except wavio.InvalidRiffWave: print(" Not a RIFF WAVE!") continue s = json.dumps(chunks, indent = 4, separators = (', ', ' : '), sort_keys = True) for line in s.split('\n'): print(' %s' % line) x, sr = wavio.read(f) if x.ndim > 1: x = x[:,0] plt.figure() plt.plot(x, 'b-') plt.grid(True) plt.xlabel('sample bin') plt.ylabel('amplitude') plt.title('wav = %s' % f) # write out forward & reverse fout = 'fwd-%02d.wav' % i wavio.write(fout, x, sr, dtype = np.float32) print('Wrote %s' % fout) f = fout chunks = wavio.read_chunks(f) s = json.dumps(chunks, indent = 4, separators = (', ', ' : '), sort_keys = True) for line in s.split('\n'): print(' %s' % line) x, sr = wavio.read(f) if x.ndim > 1: x = x[:,0] plt.figure() plt.plot(x, 'b-') plt.grid(True) plt.xlabel('sample bin') plt.ylabel('amplitude') plt.title('wav = %s' % f) plt.show()
def deleteClick2(dirName): """ Given the directory of sounds this deletes the annotation segments with rain corrupted. Check to make sure the segment to delete has no sign of kiwi - use fundamental frq rather than eRatio in 'deleteClick' """ for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.data') and file[:-5] in files: # go through each segment file = root + '/' + file with open(file) as f: segments = json.load(f) newSegments = copy.deepcopy(segments) wavobj = wavio.read(file[:-5]) audioData = wavobj.data if audioData is not 'float': audioData = audioData / 32768.0 audioData = audioData[:, 0].squeeze() sampleRate = wavobj.rate if sampleRate != 16000: audioData = librosa.core.audio.resample(audioData, sampleRate, 16000) sampleRate = 16000 # Find T_ERatio based on first 5 secs as it varies accorss the recordings post = SupportClasses.postProcess(audioData, sampleRate, []) # T_ERatio = post.eRatioConfd([1, 6, "", ""]) print(file) if len(segments)>2: ff = Features.Features(audioData, sampleRate) mfcc = ff.get_mfcc() mean = np.mean(mfcc[1, :]) std = np.std(mfcc[1, :]) thr = mean - 2 * std # mfcc1 thr for the file else: thr = 0 chg = False for seg in segments: if seg[0] == -1: continue else: # read the sound segment and check for wind secs = seg[1] - seg[0] wavobj = wavio.read(file[:-5], nseconds=secs, offset=seg[0]) data = wavobj.data sampleRate =wavobj.rate if data is not 'float': data = data / 32768.0 data = data[:, 0].squeeze() # check for clicks ff = Features.Features(data, sampleRate) mfcc = ff.get_mfcc() mfcc1 = mfcc[1, :] # mfcc1 of the segment if thr == 0: ff = Features.Features(audioData, sampleRate) mfcc = ff.get_mfcc() mean = np.mean(mfcc[1,:]) std = np.std(mfcc[1,:]) thr = mean - 2 * std # mfcc1 thr for the file if np.min(mfcc1) < thr: # # now check eRatio # eRatio = post.eRatioConfd(seg) # eRatio(file[:-5], seg, thr=T_ERatio) # eRatioBefore = post.eRatioConfd([seg[0] - 10, seg[0], "", ""]) # eRatioAfter = post.eRatioConfd([seg[1], seg[1] + 10, "", ""]) # if eRatio > eRatioBefore*1.05 or eRatio > eRatioAfter*1.05: # continue # now check f. frq. # down sample to avoid higher frq noise if sampleRate != 16000: data = librosa.core.audio.resample(data, sampleRate, 16000) sampleRate = 16000 # denoise prior to f frq detection waveletDenoiser = WaveletFunctions.WaveletFunctions(data=data, wavelet=None, maxLevel=12) data = waveletDenoiser.waveletDenoise(data, thresholdType='soft', wavelet='dmey2', maxLevel=12) sp = SignalProc.SignalProc([], 0, 512, 256) sgRaw = sp.spectrogram(data, 512,256, mean_normalise=True, onesided=True, multitaper=False) segment = Segment.Segment(data, sgRaw, sp, sampleRate,512, 256) pitch, y, minfreq, W = segment.yin() ind = np.squeeze(np.where(pitch > minfreq)) pitch = pitch[ind] ff = np.mean(pitch) if ff>500 and ff<5000: continue else: print(seg) newSegments.remove(seg) chg = True if chg: file = open(file, 'w') json.dump(newSegments, file)
def deleteClick(dirName): """ Given the directory of sounds this deletes the annotation segments with wind/rain corrupted files. Targeting moderate wind and above. Check to make sure the segment to delete has no sign of kiwi """ for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.data') and file[:-5] in files: # go through each segment file = root + '/' + file with open(file) as f: segments = json.load(f) newSegments = copy.deepcopy(segments) wavobj = wavio.read(file[:-5]) audioData = wavobj.data if audioData is not 'float': audioData = audioData / 32768.0 audioData = audioData[:, 0].squeeze() sampleRate = wavobj.rate # Find T_ERatio based on first 5 secs as it varies accorss the recordings post = SupportClasses.postProcess(audioData, sampleRate, []) # T_ERatio = post.eRatioConfd([1, 6, "", ""]) print(file) chg = False for seg in segments: if seg[0] == -1: continue else: # read the sound segment and check for wind secs = seg[1] - seg[0] wavobj = wavio.read(file[:-5], nseconds=secs, offset=seg[0]) data = wavobj.data if data is not 'float': data = data / 32768.0 data = data[:, 0].squeeze() # check for clicks ff = Features.Features(data, sampleRate) mfcc = ff.get_mfcc() mfcc1 = mfcc[1, :] # mfcc1 of the segment ff = Features.Features(audioData, sampleRate) mfcc = ff.get_mfcc() mean = np.mean(mfcc[1,:]) std = np.std(mfcc[1,:]) thr = mean - 2 * std # mfcc1 thr for the file if np.min(mfcc1) < thr: # # # now check eRatio # eRatio = post.eRatioConfdV2(seg) # if eRatio > 1.0: # continue # just check duration>10 sec if secs > 10: continue else: print(seg) newSegments.remove(seg) chg = True if chg: file = open(file, 'w') json.dump(newSegments, file)
def deleteWindRain(dirName, windTest=True, rainTest=False, Tmean_wind = 1e-8): """ Given the directory of sounds this deletes the annotation segments with wind/rain corrupted files. Targeting moderate wind and above. Check to make sure the segment to delete has no sign of kiwi Automatic Identification of Rainfall in Acoustic Recordings by Carol Bedoya, Claudia Isaza, Juan M.Daza, and Jose D.Lopez """ #Todo: find thrs Tmean_rain = 1e-8 # Mean threshold Tsnr_rain = 3.5 # SNR threshold # Tmean_wind = 1e-9 # Mean threshold # Tsnr_wind = 0.5 # SNR threshold cnt = 0 for root, dirs, files in os.walk(str(dirName)): for file in files: if file.endswith('.data') and file[:-5] in files: # go through each segment file = root + '/' + file with open(file) as f: segments = json.load(f) newSegments=copy.deepcopy(segments) wavobj = wavio.read(file[:-5]) audioData = wavobj.data # # *** # if audioData.dtype is not 'float': # audioData = audioData.astype('float') # / 32768.0 # if np.shape(np.shape(audioData))[0] > 1: # audioData = np.squeeze(audioData[:, 0]) # import librosa # if wavobj.rate != 16000: # audioData = librosa.core.audio.resample(audioData, wavobj.rate, 16000) # sampleRate = 16000 # # **** if audioData is not 'float': audioData = audioData / 32768.0 audioData = audioData[:, 0].squeeze() sampleRate = wavobj.rate # Find T_ERatio based on first 5 secs as it varies accorss the recordings post = SupportClasses.postProcess(audioData, sampleRate, []) # T_ERatio = post.eRatioConfd([1, 6, "", ""]) chg = False for seg in segments: if seg[0] == -1: continue else: # read the sound segment and check for wind secs = seg[1]-seg[0] wavobj = wavio.read(file[:-5], nseconds=secs, offset=seg[0]) data = wavobj.data # # *** # if data.dtype is not 'float': # data = data.astype('float') # / 32768.0 # if np.shape(np.shape(data))[0] > 1: # data = np.squeeze(data[:, 0]) # if wavobj.rate != 16000: # data = librosa.core.audio.resample(data, wavobj.rate, 16000) if data is not 'float': data = data / 32768.0 data = data[:,0].squeeze() wind_lower = 2.0 * 100 / sampleRate wind_upper = 2.0 * 250 / sampleRate rain_lower = 2.0 * 600 / sampleRate rain_upper = 2.0 * 1200 / sampleRate f, p = signal.welch(data, fs=sampleRate, window='hamming', nperseg=512, detrend=False) if windTest: limite_inf = int(round(len(p) * wind_lower)) # minimum frequency of the rainfall frequency band 0.00625(in # normalized frequency); in Hz = 0.00625 * (44100 / 2) = 100 Hz limite_sup = int(round(len(p) * wind_upper)) # maximum frequency of the rainfall frequency band 0.03125(in # normalized frequency); in Hz = 0.03125 * (44100 / 2) = 250 Hz a_wind = p[limite_inf:limite_sup] # section of interest of the power spectral density.Step 2 in Algorithm 2.1 mean_a_wind = np.mean(a_wind) # mean of the PSD in the frequency band of interest.Upper part of the step 3 in Algorithm 2.1 std_a_wind = np.std(a_wind) # standar deviation of the PSD in the frequency band of the interest. Lower part of the step 3 in Algorithm 2.1 # c_wind = mean_a_wind / std_a_wind # signal to noise ratio of the analysed recording. step 3 in Algorithm 2.1 if mean_a_wind > Tmean_wind: # eRatio = post.eRatioConfd(seg) #eRatio(file[:-5], seg, thr=T_ERatio) # eRatioBefore = post.eRatioConfd([seg[0]-10, seg[0], "", ""]) # if eRatio > eRatioBefore*1.05: # or eRatio > eRatioAfter: #it was 10 secs Before eratio # #version2 # eRatio = post.eRatioConfdV2(seg) # if eRatio > 1.0: # potentialCall = True # # now check f. frq. # # down sample will helkp to avoid higher frq noise # if sampleRate != 16000: # data = librosa.core.audio.resample(data, sampleRate, 16000) # sampleRate = 16000 # # denoise prior to f. frq. detection # waveletDenoiser = WaveletFunctions.WaveletFunctions(data=data, wavelet=None, # maxLevel=12) # data = waveletDenoiser.waveletDenoise(data, thresholdType='soft', wavelet='dmey2', # maxLevel=12) # sp = SignalProc.SignalProc([], 0, 512, 256) # sgRaw = sp.spectrogram(data, 512, 256, mean_normalise=True, onesided=True, # multitaper=False) # segment = Segment.Segment(data, sgRaw, sp, sampleRate, 512, 256) # pitch, y, minfreq, W = segment.yin(minfreq=600) # ind = np.squeeze(np.where(pitch > minfreq)) # pitch = pitch[ind] # ff = np.mean(pitch) # if ff > 500 and ff < 5000: # potentialCall = True # else: # potentialCall = False # just check duration>10 sec if secs>10: potentialCall = True else: potentialCall = False if not potentialCall: print(file, seg, "--> windy") newSegments.remove(seg) chg = True else: print(file, seg, "--> not windy") if rainTest: limite_inf = int(round(len(p) * rain_lower)) # minimum frequency of the rainfall frequency band 0.0272 (in # normalized frequency); in Hz=0.0272*(44100/2)=599.8 Hz limite_sup = int(round(len(p) * rain_upper)) # maximum frequency of the rainfall frequency band 0.0544 (in # normalized frequency); in Hz=0.0544*(44100/2)=1199.5 Hz a_rain = p[limite_inf:limite_sup] # section of interest of the power spectral density.Step 2 in Algorithm 2.1 mean_a_rain = np.mean(a_rain) # mean of the PSD in the frequency band of interest.Upper part of the step 3 in Algorithm 2.1 std_a_rain = np.std(a_rain) # standar deviation of the PSD in the frequency band of the interest. Lower part of the step 3 in Algorithm 2.1 c_rain = mean_a_rain / std_a_rain # signal to noise ratio of the analysed recording. step 3 in Algorithm 2.1 if c_rain > Tsnr_rain: # check if it is not kiwi eRatio = post.eRatioConfd(seg) # eRatio(file[:-5], seg, thr=T_ERatio) eRatioBefore = post.eRatioConfd([seg[0] - 10, seg[0], "", ""]) # eRatioAfter = post.eRatioConfd([seg[1], seg[1] + 5, "", ""]) # T_ERatio = (eRatioBefore + eRatioAfter) / 2 if eRatio > eRatioBefore: # or eRatio > eRatioAfter: #it was 10 secs Before eratio potentialCall = True else: potentialCall = False if not potentialCall: print(file, seg, "--> windy") newSegments.remove(seg) chg = True else: # rainy.append(0) print(file, "--> not rainy") if chg: file = open(file, 'w') json.dump(newSegments, file) cnt += 1 print(file, cnt)
def annotation2GT(wavFile,species,duration=0): """ This generates the ground truth for a given sound file (currently for kiwi and bittern). Given the AviaNZ annotation, returns the ground truth as a txt file """ # wavFile=datFile[:-5] datFile=wavFile+'.data' eFile = datFile[:-9]+'-sec.txt' if duration ==0: wavobj = wavio.read(wavFile) sampleRate = wavobj.rate data = wavobj.data duration=len(data)/sampleRate # number of secs GT=np.zeros((duration,4)) GT=GT.tolist() GT[:][1]=str(0) GT[:][2]='' GT[:][3]='' if os.path.isfile(datFile): print (datFile) with open(datFile) as f: segments = json.load(f) for seg in segments: if seg[0]==-1: continue # x = re.search(species, str(seg[4])) # print x if not re.search(species, seg[4]): continue elif species=='Kiwi' or 'Gsk': # check M/F if '(M)' in str(seg[4]): # if re.search('(M)', seg[4]): type = 'M' elif '(F)' in str(seg[4]): #if re.search('(F)', seg[4]): type='F' elif '(D)' in str(seg[4]): type='D' else: type='K' elif species=='Bittern': # check boom/inhalation if '(B)' in str(seg[4]): type = 'B' elif '(I)' in str(seg[4]): type='I' else: type='' # check quality if re.search('1', seg[4]): quality = '*****' # v close elif re.search('2', seg[4]): quality = '****' # close elif re.search('3', seg[4]): quality = '***' # fade elif re.search('4', seg[4]): quality = '**' # v fade elif re.search('5', seg[4]): quality = '*' # v v fade else: quality = '' s=int(math.floor(seg[0])) e=int(math.ceil(seg[1])) for i in range(s,e): GT[i][1] = str(1) GT[i][2] = type GT[i][3] = quality for line in GT: if line[1]==0.0: line[1]='0' if line[2]==0.0: line[2]='' if line[3]==0.0: line[3]='' # now save GT as a .txt file for i in range(1, duration + 1): GT[i-1][0]=str(i) # add time as the first column to make GT readable out = file(eFile, "w") for line in GT: print >> out, "\t".join(line) out.close()