def split_audio_file(source_audio_file, source_transcript, target_directory): """ Execute the split logic """ source_audio = audio_file(source_audio_file) transcript = time_aligned_text(source_transcript) source_audio.split(transcript, target_directory)
def __init__(self, *args, **kwargs): """ Initialize from location and populate list of SPH, WAV, or MP3 audio files and STM files into segments """ for dictionary in args: if isinstance(dictionary, dict): for key in dictionary: setattr(self, key, dictionary[key]) for key in kwargs: setattr(self, key, kwargs[key]) # only if not defined above should we search for exemplars # based on location if not self.exemplars: # instantiate exemplars for this object to override # static class variable self.exemplars = [] audio_extensions_to_try = ["sph", "wav", "mp3"][::-1] self.exemplars += [ exemplar({ "audio_file": audio_file(fl), "transcript_file": time_aligned_text(strip_extension(fl) + ".stm"), }) for audio_extension in audio_extensions_to_try for fl in (get_files(self.location, audio_extension) if self. location else []) if (os.path.exists(strip_extension(fl) + ".stm")) ] # gather all exemplars from /stm and /sph subdirectories if present self.exemplars += [ exemplar({ "audio_file": audio_file(fl), "transcript_file": time_aligned_text(self.location + "/stm/" + basename(strip_extension(fl)) + ".stm"), }) for audio_extension in audio_extensions_to_try for fl in (get_files(self.location + "/sph/", audio_extension) if self.location else []) if (os.path.exists(self.location + "/stm/" + basename(strip_extension(fl)) + ".stm")) ]
def split_audio_file(source_audio_file, source_transcript, target_directory): """ Split source audio file into segments denoted by transcript file into target_directory Results in stm and sph files in target directory """ source_audio = audio_file(source_audio_file) transcript = time_aligned_text(source_transcript) source_audio.split(transcript, target_directory)
def __init__(self, input_dict=None): """ Initialize from location and populate list of SPH and STM files into segments """ self.__dict__.update(input_dict if input_dict else {}) if not self.exemplars: audio_files = [ audio_file(_) for _ in sorted(get_files(self.location, "sph")) ] transcript_files = [ time_aligned_text(_) for _ in sorted(get_files(self.location, "stm")) ] self.exemplars = [ exemplar({ "audio_file": af, "transcript_file": tf }) for af, tf in zip(audio_files, transcript_files) ]
def check_audio_file(audio_file_name): if valid_input_file(audio_file_name, ["mp3", "sph", "wav", "au", "raw"]): return audio_file(audio_file_name) else: LOGGER.error("Invalid audio file {}".format(audio_file_name)) sys.exit(1)