def transcribe_audio(self, stereo_path, channels_to_process): if not os.path.isfile(stereo_path): raise Exception("Audio file does not exist.") data = self.split_to_mono(stereo_path) a_leg = data['a_leg'] b_leg = data['b_leg'] data['a_leg'] = None data['b_leg'] = None validator = AudioEnergyValidator(sample_width=data['frame_width'], energy_threshold=45) trimmer = StreamTokenizer(validator, min_length=self.min_segment_length, max_length=self.max_segment_length, max_continuous_silence=self.max_continuous_silence, mode=StreamTokenizer.DROP_TAILING_SILENCE) segments = [] if channels_to_process in ['A', 'AB']: a_source = ADSFactory.ads(audio_source=a_leg, record=True, block_size=data['frame_rate'] / self.divisor) a_source.open() trimmer.tokenize(a_source, callback=lambda data, start, end: segments.append(("A", data, start, end))) if channels_to_process in ['B', 'AB']: b_source = ADSFactory.ads(audio_source=b_leg, record=True, block_size=data['frame_rate'] / self.divisor) b_source.open() trimmer.tokenize(b_source, callback=lambda data, start, end: segments.append(("B", data, start, end))) segments = sorted(segments, key=lambda x: x[3]) self.batch(segments, data['duration'], data['frame_rate'], data['frame_width'], data['nchannels'])
def test_block_size(self): ads = ADSFactory.ads(audio_source=self.audio_source, block_size=512) size = ads.get_block_size() self.assertEqual(size, 512, "Wrong block_size, expected: 512, found: {0}".format(size)) # with alias keyword ads = ADSFactory.ads(audio_source=self.audio_source, bs=160) size = ads.get_block_size() self.assertEqual(size, 160, "Wrong block_size, expected: 160, found: {0}".format(size))
def test_block_duration(self): ads = ADSFactory.ads(audio_source=self.audio_source, block_dur=0.01) # 10 ms size = ads.get_block_size() self.assertEqual(size, 160, "Wrong block_size, expected: 160, found: {0}".format(size)) # with alias keyword ads = ADSFactory.ads(audio_source=self.audio_source, bd=0.025) # 25 ms size = ads.get_block_size() self.assertEqual(size, 400, "Wrong block_size, expected: 400, found: {0}".format(size))
def test_hop_duration(self): ads = ADSFactory.ads(audio_source=self.audio_source, block_dur=0.02, hop_dur=0.01) # 10 ms size = ads.hop_size self.assertEqual(size, 160, "Wrong hop_size, expected: 160, found: {0}".format(size)) # with alias keyword ads = ADSFactory.ads(audio_source=self.audio_source, bd=0.025, hop_dur=0.015) # 15 ms size = ads.hop_size self.assertEqual(size, 240, "Wrong block_size, expected: 240, found: {0}".format(size))
def test_block_size(self): ads = ADSFactory.ads(audio_source=self.audio_source, block_size=512) size = ads.get_block_size() self.assertEqual( size, 512, "Wrong block_size, expected: 512, found: {0}".format(size)) # with alias keyword ads = ADSFactory.ads(audio_source=self.audio_source, bs=160) size = ads.get_block_size() self.assertEqual( size, 160, "Wrong block_size, expected: 160, found: {0}".format(size))
def test_block_duration(self): ads = ADSFactory.ads(audio_source=self.audio_source, block_dur=0.01) # 10 ms size = ads.get_block_size() self.assertEqual( size, 160, "Wrong block_size, expected: 160, found: {0}".format(size)) # with alias keyword ads = ADSFactory.ads(audio_source=self.audio_source, bd=0.025) # 25 ms size = ads.get_block_size() self.assertEqual( size, 400, "Wrong block_size, expected: 400, found: {0}".format(size))
def split(filename='g1238-20181214-081712-1544750232.37681.wav'): sr, samples = wavfile.read(filename=filename, mmap=True) #print(len(samples)) plt.plot(samples) asource = ADSFactory.ads(filename=filename, record=False) validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=50) # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate()) # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms # max_length=400 : maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds # max_continuous_silence=30 : maximum length of a tolerated silence within a valid audio activity is 30 * 10 == 300 ms tokenizer = StreamTokenizer(validator=validator, min_length=100, max_length=500, max_continuous_silence=50) asource.open() tokens = tokenizer.tokenize(asource) stack = [] sum = [] for i, t in enumerate(tokens): #print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2])) stack.append([t[1] * 80, t[2] * 80]) sum.append((t[2] * 80 - t[1] * 80) / 8000) wavfile.write('token_' + str(i) + '.wav', sr, samples[t[1] * 80:t[2] * 80]) #write to file asource.close() print(sum) return stack
def test_data_buffer_alias(self): ads = ADSFactory.ads(db=self.signal, sampling_rate=16, sample_width=2, channels=1) self.assertEqual(ads.get_audio_source().get_data_buffer(), self.signal, "Wrong value for data buffer")
def test_Recorder_Deco_rewind_and_read(self): ads = ADSFactory.ads(audio_source=self.audio_source, record=True, block_size=320) ads.open() for i in range(10): ads.read() ads.rewind() # read all available data after rewind ads_data = [] while True: block = ads.read() if block is None: break ads_data.append(block) ads.close() ads_data = b''.join(ads_data) audio_source = WaveAudioSource( filename=dataset.one_to_six_arabic_16000_mono_bc_noise) audio_source.open() audio_source_data = audio_source.read(320 * 10) audio_source.close() self.assertEqual(ads_data, audio_source_data, "Unexpected data read from RecorderADS")
def test_Limiter_Overlap_Deco_read(self): block_size = 256 hop_size = 200 ads = ADSFactory.ads(audio_source=self.audio_source, max_time=0.50, block_size=block_size, hop_size=hop_size) # Read all available data overlapping blocks ads.open() ads_data = [] while True: block = ads.read() if block is None: break ads_data.append(block) ads.close() # Read all data from file and build a BufferAudioSource fp = wave.open(dataset.one_to_six_arabic_16000_mono_bc_noise, "r") wave_data = fp.readframes(fp.getnframes()) fp.close() audio_source = BufferAudioSource(wave_data, ads.get_sampling_rate(), ads.get_sample_width(), ads.get_channels()) audio_source.open() # Compare all blocks read from OverlapADS to those read # from an audio source with a manual set_position for i,block in enumerate(ads_data): tmp = audio_source.read(block_size) self.assertEqual(block, tmp, "Unexpected block (N={0}) read from OverlapADS".format(i)) audio_source.set_position((i+1) * hop_size) audio_source.close()
def test_sample_width(self): ads = ADSFactory.ads(audio_source=self.audio_source) swidth = ads.get_sample_width() self.assertEqual( swidth, 2, "Wrong sample width, expected: 2, found: {0}".format(swidth))
def test_Limiter_Deco_read_limit(self): # read a maximum of 1.191 seconds from audio source ads = ADSFactory.ads(audio_source=self.audio_source, max_time=1.191) total_samples = round(ads.sampling_rate * 1.191) nb_full_blocks, last_block_size = divmod(total_samples, ads.block_size) total_samples_with_overlap = (nb_full_blocks * ads.block_size + last_block_size) expected_read_bytes = (total_samples_with_overlap * ads.sw * ads.channels) total_read = 0 ads.open() i = 0 while True: block = ads.read() if block is None: break i += 1 total_read += len(block) ads.close() err_msg = "Wrong data length read from LimiterADS, expected: {0}, " err_msg += "found: {1}" self.assertEqual( total_read, expected_read_bytes, err_msg.format(expected_read_bytes, total_read), )
def test_Recorder_Deco_rewind_and_read(self): ads = ADSFactory.ads(audio_source=self.audio_source, record=True, block_size = 320) ads.open() for i in range(10): ads.read() ads.rewind() # read all available data after rewind ads_data = [] while True: block = ads.read() if block is None: break ads_data.append(block) ads.close() ads_data = b''.join(ads_data) audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise) audio_source.open() audio_source_data = audio_source.read(320 * 10) audio_source.close() self.assertEqual(ads_data, audio_source_data, "Unexpected data read from RecorderADS")
def test_Recorder_Overlap_Deco_is_rewindable(self): ads = ADSFactory.ads(audio_source=self.audio_source, block_size=320, hop_size=160, record=True) self.assertTrue(ads.is_rewindable(), "RecorderADS.is_rewindable should return True")
def test_hop_duration_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, bd=0.75, hd=0.5 ) size = ads.hop_size self.assertEqual(size, 8, "Wrong block_size using bs alias, expected: 8, found: {0}".format(size)) self.assertIsInstance(ads, ADSFactory.OverlapADS, "ads expected to an ADSFactory.OverlapADS object")
def test_sampling_rate(self): ads = ADSFactory.ads(audio_source=self.audio_source) srate = ads.get_sampling_rate() self.assertEqual( srate, 16000, "Wrong sampling rate, expected: 16000, found: {0}".format(srate))
def test_default_block_size(self): ads = ADSFactory.ads(audio_source=self.audio_source) size = ads.block_size self.assertEqual( size, 160, "Wrong default block_size, expected: 160, found: {0}".format(size), )
def test_channels(self): ads = ADSFactory.ads(audio_source=self.audio_source) channels = ads.get_channels() self.assertEqual( channels, 1, "Wrong number of channels, expected: 1, found: {0}".format( channels))
def test_sample_width_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sw=2, channels=1) swidth = ads.get_sample_width() self.assertEqual( swidth, 2, "Wrong sample width, expected: 2, found: {0}".format(swidth))
def test_sampling_rate_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sr=16, sample_width=2, channels=1) srate = ads.get_sampling_rate() self.assertEqual( srate, 16, "Wrong sampling rate, expected: 16000, found: {0}".format(srate))
def setUp(self): self.signal = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ012345" self.ads = ADSFactory.ads( data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, block_size=4, )
def test_max_time_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, mt=10) self.assertIsInstance( ads, ADSFactory.LimiterADS, "ads expected to an ADSFactory.LimiterADS object")
def test_record_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, rec=True) self.assertIsInstance( ads, ADSFactory.RecorderADS, "ads expected to an ADSFactory.RecorderADS object")
def test_hop_duration(self): ads = ADSFactory.ads(audio_source=self.audio_source, block_dur=0.02, hop_dur=0.01) # 10 ms size = ads.hop_size self.assertEqual( size, 160, "Wrong hop_size, expected: 160, found: {0}".format(size)) # with alias keyword ads = ADSFactory.ads(audio_source=self.audio_source, bd=0.025, hop_dur=0.015) # 15 ms size = ads.hop_size self.assertEqual( size, 240, "Wrong block_size, expected: 240, found: {0}".format(size))
def test_Limiter_Deco_type(self): ads = ADSFactory.ads(audio_source=self.audio_source, max_time=1) self.assertIsInstance( ads, ADSFactory.LimiterADS, msg= "wrong type for ads object, expected: 'ADSFactory.LimiterADS', found: {0}" .format(type(ads)))
def test_Recorder_Deco_type(self): ads = ADSFactory.ads(audio_source=self.audio_source, record=True) self.assertIsInstance( ads, ADSFactory.RecorderADS, msg= "wrong type for ads object, expected: 'ADSFactory.RecorderADS', found: {0}" .format(type(ads)))
def test_Recorder_Overlap_Deco_type(self): ads = ADSFactory.ads(audio_source=self.audio_source, block_size=256, hop_size=128, record=True) self.assertIsInstance(ads, ADSFactory.OverlapADS, msg="wrong type for ads object, expected: 'ADSFactory.OverlapADS', found: {0}".format(type(ads))) self.assertIsInstance(ads.ads, ADSFactory.RecorderADS, msg="wrong type for ads object, expected: 'ADSFactory.RecorderADS', found: {0}".format(type(ads)))
def test_channels_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, ch=1) channels = ads.get_channels() self.assertEqual( channels, 1, "Wrong number of channels, expected: 1, found: {0}".format( channels))
def test_ADS_type(self): ads = ADSFactory.ads(audio_source=self.audio_source) self.assertIsInstance( ads, ADSFactory.AudioDataSource, msg= "wrong type for ads object, expected: 'ADSFactory.AudioDataSource', found: {0}" .format(type(ads)))
def calibrate(self): ''' This method calibrates the MinMaxScaler, self.scaler, by capturing 10 seconds of audio and applying MinMaxScaler fit method. See sklearn.preprocessing.MinMaxScaler for details. This is redundant, scaling is not necessary. ''' a = raw_input( "Calibrate normalisation, press return then make noises from your mouth hole." ) if self.audioPath == None: asource = ADSFactory.ads(sampling_rate=self.sr, max_time=10) else: asource = ADSFactory.ads(filename=self.audioPath, sampling_rate=self.sr, max_time=10) validator = AudioEnergyValidator( sample_width=asource.get_sample_width(), energy_threshold=self.energy) tokenizer = StreamTokenizer(validator=validator, min_length=self.min_len, max_length=self.max_len, max_continuous_silence=self.max_con_si) def calib_callback(data, start, end): audio = np.fromstring(data[0], dtype=np.int8) self.scaler.fit_transform(np.swapaxes(np.asarray([audio]), 0, 1)) print "Audio sample found {0}--{1}".format(start, end) asource.open() tokenizer.tokenize(asource, callback=calib_callback) print "Scaler paramaters found: min: {0} max: {1}".format( self.scaler.data_min_, self.scaler.data_max_) print "calibration done" self.mini = self.scaler.data_min_ self.maxi = self.scaler.data_max_
def getSplitAudioDurationListBetweenSilence(fileName,eachAudioLen,silencePeriod,energyThreshold=55): try: # We set the `record` argument to True so that we can rewind the source asource = ADSFactory.ads(filename=fileName, record=False) validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=energyThreshold) # Default analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate()) # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms # max_length=400 : maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds # max_continuous_silence=30 : maximum length of a tolerated silence within a valid audio activity is 30 * 30 == 300 ms tokenizer = StreamTokenizer(validator=validator, min_length=400, max_length=eachAudioLen*100, max_continuous_silence=silencePeriod*100) asource.open() tokens = tokenizer.tokenize(asource) # Play detected regions back #player = player_for(asource) # Rewind and read the whole signal #asource.rewind() #original_signal = [] #while True: # w = asource.read() # if w is None: # break # original_signal.append(w) #original_signal = b''.join(original_signal) #player.play(original_signal) #print("\n ** playing detected regions...\n") #for i,t in enumerate(tokens): # print("Token [{0}] starts at {1} and ends at {2}".format(i+1, t[1], t[2])) #data = b''.join(t[0]) #player.play(data) #assert len(tokens) == 8 asource.close() #player.stop() except KeyboardInterrupt: #player.stop() asource.close() #sys.exit(0) except Exception as e: sys.stderr.write(str(e) + "\n") #sys.exit(1) return tokens
def test_record_alias(self): ads = ADSFactory.ads( data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, rec=True, block_dur=0.5, ) self.assertTrue(ads.rewindable, "AudioDataSource.rewindable expected to be True")
def test_block_size_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, bs=8) size = ads.get_block_size() self.assertEqual( size, 8, "Wrong block_size using bs alias, expected: 8, found: {0}".format( size))
def test_ADS_type(self): ads = ADSFactory.ads(audio_source=self.audio_source) err_msg = "wrong type for ads object, expected: 'AudioDataSource', " err_msg += "found: {0}" self.assertIsInstance( ads, AudioDataSource, err_msg.format(type(ads)), )
def test_Recorder_Deco_rewind(self): ads = ADSFactory.ads(audio_source=self.audio_source, record=True, block_size = 320) ads.open() ads.read() ads.rewind() self.assertIsInstance(ads.get_audio_source(), BufferAudioSource, "After rewind RecorderADS.get_audio_source should \ be an instance of BufferAudioSource") ads.close()
def test_Overlap_Deco_type(self): # an OverlapADS is obtained if a valid hop_size is given ads = ADSFactory.ads(audio_source=self.audio_source, block_size=256, hop_size=128) self.assertIsInstance( ads, ADSFactory.OverlapADS, msg= "wrong type for ads object, expected: 'ADSFactory.OverlapADS', found: {0}" .format(type(ads)))
def test_block_duration_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, bd=0.75) # 0.75 ms = 0.75 * 16 = 12 size = ads.get_block_size() self.assertEqual( size, 12, "Wrong block_size set with a block_dur alias 'bd', expected: 8, found: {0}" .format(size))
def test_Limiter_Recorder_Overlap_Deco_rewind_and_read_limit(self): # Use arbitrary valid block_size and hop_size block_size = 1000 hop_size = 200 ads = ADSFactory.ads(audio_source=self.audio_source, max_time=1.317, block_size=block_size, hop_size=hop_size, record=True) # Limiter + Overlap decos => read N block of actual data # one block of size block_size # N - 1 blocks of size hop_size # the total size of read data might be a slightly greater # than the required size calculated from max_time # theoretical size to reach expected_size = int(ads.get_sampling_rate() * 1.317) * \ ads.get_sample_width() * ads.get_channels() # minus block_size expected_size -= (block_size * ads.get_sample_width() * ads.get_channels()) # how much data are required to get N - 1 blocks of size hop_size hop_size_bytes = hop_size * ads.get_sample_width() * ads.get_channels() r = expected_size % hop_size_bytes if r > 0: expected_size += hop_size_bytes - r expected_size += block_size * ads.get_sample_width( ) * ads.get_channels() cache_size = (block_size - hop_size) * ads.get_sample_width() * ads.get_channels() total_read = cache_size ads.open() i = 0 while True: block = ads.read() if block is None: break i += 1 total_read += len(block) - cache_size ads.close() self.assertEqual( total_read, expected_size, "Wrong data length read from LimiterADS, expected: {0}, found: {1}" .format(expected_size, total_read))
def test_read(self): ads = ADSFactory.ads(audio_source=self.audio_source, block_size = 256) ads.open() ads_data = ads.read() ads.close() audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise) audio_source.open() audio_source_data = audio_source.read(256) audio_source.close() self.assertEqual(ads_data, audio_source_data, "Unexpected data read from ads")
def test_Limiter_Recorder_Overlap_Deco_rewind_and_read_limit(self): # Use arbitrary valid block_size and hop_size block_size = 1000 hop_size = 200 ads = ADSFactory.ads(audio_source=self.audio_source, max_time = 1.317, block_size=block_size, hop_size=hop_size, record=True) # Limiter + Overlap decos => read N block of actual data # one block of size block_size # N - 1 blocks of size hop_size # the total size of read data might be a slightly greater # than the required size calculated from max_time # theoretical size to reach expected_size = int(ads.get_sampling_rate() * 1.317) * \ ads.get_sample_width() * ads.get_channels() # minus block_size expected_size -= (block_size * ads.get_sample_width() * ads.get_channels()) # how much data are required to get N - 1 blocks of size hop_size hop_size_bytes = hop_size * ads.get_sample_width() * ads.get_channels() r = expected_size % hop_size_bytes if r > 0: expected_size += hop_size_bytes - r expected_size += block_size * ads.get_sample_width() * ads.get_channels() cache_size = (block_size - hop_size) * ads.get_sample_width() * ads.get_channels() total_read = cache_size ads.open() i = 0 while True: block = ads.read() if block is None: break i += 1 total_read += len(block) - cache_size ads.close() self.assertEqual(total_read, expected_size, "Wrong data length read from LimiterADS, expected: {0}, found: {1}".format(expected_size, total_read))
def test_Limiter_Deco_read(self): # read a maximum of 0.75 seconds from audio source ads = ADSFactory.ads(audio_source=self.audio_source, max_time=0.75) ads_data = [] ads.open() while True: block = ads.read() if block is None: break ads_data.append(block) ads.close() ads_data = b''.join(ads_data) audio_source = WaveAudioSource(filename=dataset.one_to_six_arabic_16000_mono_bc_noise) audio_source.open() audio_source_data = audio_source.read(int(16000 * 0.75)) audio_source.close() self.assertEqual(ads_data, audio_source_data, "Unexpected data read from LimiterADS")
def test_Limiter_Recorder_Overlap_Deco_rewind_and_read_alias(self): # Use arbitrary valid block_size and hop_size block_size = 5 hop_size = 4 ads = ADSFactory.ads(db=self.signal, sr=16, sw=2, ch=1, mt = 0.80, bs=block_size, hs=hop_size, rec=True) # Read all available data overlapping blocks ads.open() i = 0 while True: block = ads.read() if block is None: break i += 1 ads.rewind() # Build a BufferAudioSource audio_source = BufferAudioSource(self.signal, ads.get_sampling_rate(), ads.get_sample_width(), ads.get_channels()) audio_source.open() # Compare all blocks read from OverlapADS to those read # from an audio source with a manual set_position for j in range(i): tmp = audio_source.read(block_size) block = ads.read() self.assertEqual(block, tmp, "Unexpected block (N={0}) read from OverlapADS".format(i)) audio_source.set_position((j+1) * hop_size) ads.close() audio_source.close()
def test_Limiter_Recorder_Overlap_Deco_rewind_and_read(self): # Use arbitrary valid block_size and hop_size block_size = 1600 hop_size = 400 ads = ADSFactory.ads(audio_source=self.audio_source, max_time = 1.50, block_size=block_size, hop_size=hop_size, record=True) # Read all available data overlapping blocks ads.open() i = 0 while True: block = ads.read() if block is None: break i += 1 ads.rewind() # Read all data from file and build a BufferAudioSource fp = wave.open(dataset.one_to_six_arabic_16000_mono_bc_noise, "r") wave_data = fp.readframes(fp.getnframes()) fp.close() audio_source = BufferAudioSource(wave_data, ads.get_sampling_rate(), ads.get_sample_width(), ads.get_channels()) audio_source.open() # Compare all blocks read from OverlapADS to those read # from an audio source with a manual set_position for j in range(i): tmp = audio_source.read(block_size) self.assertEqual(ads.read(), tmp, "Unexpected block (N={0}) read from OverlapADS".format(i)) audio_source.set_position((j+1) * hop_size) ads.close() audio_source.close()
def _get_asr_result_whole(folder,prefix): asource = ADSFactory.ads(filename='./temp/{}.wav'.format(prefix), block_size=160) validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65) tokenizer = StreamTokenizer(validator=validator, min_length=300, max_length=1000, max_continuous_silence=50) asource.open() from pocketsphinx_decoder import decoder tokens = tokenizer.tokenize(asource) d = defaultdict(list) past = 0 for content,start,end in tokens: save_audio_data(data=b''.join(content), filename='tmp.wav', filetype='wav', sr=asource.get_sampling_rate(),sw = asource.get_sample_width(),ch = asource.get_channels()) decoder.start_utt() decoder.process_raw(open('tmp.wav','rb').read(),False,False) decoder.end_utt() seg = list(decoder.seg()) print(' '.join([s.word for s in seg])) def add_feature(name,add=None): if add is None: d[name].extend(list(map(attrgetter(name),seg))) else: d[name].extend([attrgetter(name)(x)+add for x in seg]) add_feature('start_frame',past) add_feature('end_frame',past) add_feature('word') add_feature('ascore') add_feature('lscore') add_feature('lback') add_feature('prob') past += len(content) df = pd.DataFrame(d) df = df[['start_frame','end_frame','ascore','lscore','lback','prob','word']] df.to_csv(path.join(folder ,'{}.csv'.format(prefix)), index=None)
def test_Limiter_Deco_read_limit(self): # read a maximum of 1.25 seconds from audio source ads = ADSFactory.ads(audio_source=self.audio_source, max_time=1.191) # desired duration into bytes is obtained by: # max_time * sampling_rate * sample_width * nb_channels # Limiter deco tries to a total quantity of data as # possible to the desired duration in bytes. # It reads N block of size block_size where: # (N - 1) * block_size < desired duration, AND # N * block_size >= desired duration # theoretical size to reach expected_size = int(ads.get_sampling_rate() * 1.191) * \ ads.get_sample_width() * ads.get_channels() # how much data are required to get N blocks of size block_size block_size_bytes = ads.get_block_size() * ads.get_sample_width() * ads.get_channels() r = expected_size % block_size_bytes if r > 0: expected_size += block_size_bytes - r total_read = 0 ads.open() i = 0 while True: block = ads.read() if block is None: break i += 1 total_read += len(block) ads.close() self.assertEqual(total_read, expected_size, "Wrong data length read from LimiterADS, expected: {0}, found: {1}".format(expected_size, total_read))
def test_sample_width(self): ads = ADSFactory.ads(audio_source=self.audio_source) swidth = ads.get_sample_width() self.assertEqual(swidth, 2, "Wrong sample width, expected: 2, found: {0}".format(swidth))
def test_sample_width_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sw=2, channels=1) swidth = ads.get_sample_width() self.assertEqual(swidth, 2, "Wrong sample width, expected: 2, found: {0}".format(swidth))
def test_sampling_rate_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sr=16, sample_width=2, channels=1) srate = ads.get_sampling_rate() self.assertEqual(srate, 16, "Wrong sampling rate, expected: 16000, found: {0}".format(srate))
def test_block_size_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, bs=8) size = ads.get_block_size() self.assertEqual(size, 8, "Wrong block_size using bs alias, expected: 8, found: {0}".format(size))
def test_channels_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, ch=1) channels = ads.get_channels() self.assertEqual(channels, 1, "Wrong number of channels, expected: 1, found: {0}".format(channels))
def test_filename_alias(self): ads = ADSFactory.ads(fn=dataset.one_to_six_arabic_16000_mono_bc_noise)
def test_block_duration_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, bd=0.75) # 0.75 ms = 0.75 * 16 = 12 size = ads.get_block_size() self.assertEqual(size, 12, "Wrong block_size set with a block_dur alias 'bd', expected: 8, found: {0}".format(size))
def test_max_time_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, mt=10) self.assertIsInstance(ads, ADSFactory.LimiterADS, "ads expected to an ADSFactory.LimiterADS object")
def test_sampling_rate(self): ads = ADSFactory.ads(audio_source=self.audio_source) srate = ads.get_sampling_rate() self.assertEqual(srate, 16000, "Wrong sampling rate, expected: 16000, found: {0}".format(srate))
def test_record_alias(self): ads = ADSFactory.ads(data_buffer=self.signal, sampling_rate=16, sample_width=2, channels=1, rec=True) self.assertIsInstance(ads, ADSFactory.RecorderADS, "ads expected to an ADSFactory.RecorderADS object")
def test_channels(self): ads = ADSFactory.ads(audio_source=self.audio_source) channels = ads.get_channels() self.assertEqual(channels, 1, "Wrong number of channels, expected: 1, found: {0}".format(channels))
import sys try: energy_threshold = 45 duration = 10 # seconds if len(sys.argv) > 1: energy_threshold = float(sys.argv[1]) if len(sys.argv) > 2: duration = float(sys.argv[2]) # record = True so that we'll be able to rewind the source. # max_time = 10: read 10 seconds from the microphone asource = ADSFactory.ads(record=True, max_time=duration) validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=energy_threshold) tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=250, max_continuous_silence=30) player = player_for(asource) def echo(data, start, end): print("Acoustic activity at: {0}--{1}".format(start, end)) player.play(b"".join(data)) asource.open() print("\n ** Make some noise (dur:{}, energy:{})...".format(duration, energy_threshold)) tokenizer.tokenize(asource, callback=echo)
""" @author: Amine SEHILI <*****@*****.**> September, 2015 """ from auditok import ADSFactory, AudioEnergyValidator, StreamTokenizer, player_for, dataset import sys try: # We set the `record` argument to True so that we can rewind the source asource = ADSFactory.ads(filename=dataset.one_to_six_arabic_16000_mono_bc_noise, record=True) validator = AudioEnergyValidator(sample_width=asource.get_sample_width(), energy_threshold=65) # Defalut analysis window is 10 ms (float(asource.get_block_size()) / asource.get_sampling_rate()) # min_length=20 : minimum length of a valid audio activity is 20 * 10 == 200 ms # max_length=400 : maximum length of a valid audio activity is 400 * 10 == 4000 ms == 4 seconds # max_continuous_silence=30 : maximum length of a tolerated silence within a valid audio activity is 30 * 30 == 300 ms tokenizer = StreamTokenizer(validator=validator, min_length=20, max_length=400, max_continuous_silence=30) asource.open() tokens = tokenizer.tokenize(asource) # Play detected regions back player = player_for(asource) # Rewind and read the whole signal asource.rewind() original_signal = []