def VAD(folder,new_folder): for filename in os.listdir(folder): audio_regions = split(folder+'/'+filename) for region in audio_regions: region.play(progress_bar=True) newname = region.save(new_folder+'/'+filename) print("region saved as: {}".format(newname))
def get_regions(url, dirpath, min_duration, max_duration): yt = YouTube(url) for stream in yt.streams.filter(mime_type="audio/webm"): stream.download(dirpath, TEMP_FILENAME) regions = split( os.path.join(dirpath, TEMP_FILENAME + ".webm"), min_duration, max_duration, ) return {"regions": list(regions)} return ("No avaiable webm audios", 404)
def get_noise_duration(filename: str): audio_regions = auditok.split( filename, min_dur=0.2, # minimum duration of a valid audio event in seconds max_dur=4, # maximum duration of an event max_silence= 0.3, # maximum duration of tolerated continuous silence within an event energy_threshold=55 # threshold of detection ) noise_duration = 0.0 for i, r in enumerate(audio_regions): noise_duration += (r.meta.end - r.meta.start) return noise_duration
def find_regions(fname, min_dur=.2, max_dur=2, max_silence=.3, energy_threshold=55, analysis_window=.01, drop_trailing_silence=True): ''' Uses empirical values to calculate regions of when the participant started speaking and when he/she finished. Input: ------ fname (string): full path to .wav recording Output: ------- result (list of 3 element tuples): 0 - region id, 1 - region start, 2 - region end *.jpg (image): saves an image with the region identified in the same folder with the .wav recording picName (string): the name of the image (same as fname with .jpg extension) ''' kwargs = { 'min_dur': min_dur, 'max_dur': max_dur, 'max_silence': max_silence, 'energy_threshold': energy_threshold, 'analysis_window': analysis_window, 'drop_trailing_silence': drop_trailing_silence } audio_regions = auditok.split(fname, **kwargs) result = [(i, r.meta.start, r.meta.end) for i, r in enumerate(audio_regions)] # Plot region = auditok.load(fname) picName = fname.rstrip( '.wav') + '.jpg' # Change '.wav' if another format is used regions = region.splitp(show=False, save_as=picName, **kwargs) # Prints a RuntimeWarning: More than ... figures have been opened # Haven't found a way to bypass it plt.close('all') del regions return result, picName
def on_data_received(self, data): self._global_data += audioop.ulaw2lin(RTP(data).load, 2) # data.decode() audio_regions = auditok.split( self._global_data, audio_format='bytes', sampling_rate=8000, sample_width=2, channels=1, min_dur=0.3, # minimum duration of a valid audio event in seconds max_dur=6, # maximum duration of an event max_silence= 0.3, # maximum duration of tolerated continuous silence within an event energy_threshold=50 # threshold of detection ) if len(list(audio_regions)) > 1: self.queue_audio.put(self._global_data) self._global_data = b''
def check_test_regions(search_uuid: str): """ Check whether non-silence regions have been derived for a given file. If no such regions exist, then derive them and insert the corresponding time spans into the test_regions table. """ unregioned_files = crud.get_unregioned_files(db, search_uuid) for file_uuid in unregioned_files: tmp_path = f"/tmp/{file_uuid}.wav" # Fetch file from s3 bucket if not already in /tmp folder if not os.path.isfile(tmp_path): s3.fget_object("audio-wav", f"{file_uuid}.wav", tmp_path) test_regions = auditok.split(tmp_path, energy_threshold=40) for r in test_regions: crud.create_test_region(db, UUID(file_uuid), round(r.meta.start, 3), round(r.meta.end, 3)) if len(unregioned_files) == 0: logger.info( "Audio activitiy regions already derived for all test file(s).") else: derived_regions = ", ".join(unregioned_files) logger.info( f"Audio activitiy regions derived for test file(s): {derived_regions}" ) # Pass search_uuid to next task in chain return search_uuid