def get_wavs(self, wav_folder): for root, dirs, files in os.walk( wav_folder, topdown=False): # loading phonemes from file for file in files: # file names -> str phone_name = file.split('.')[0] self.phones[phone_name] = SA.Audio( ) # each phone name as each phone object self.phones[phone_name].load(os.path.join(wav_folder, file)) # load for short pause and long pause, and create data (salience) (for punctuation) # reference: add echo method in SimpleAudio self.phones["sp"] = SA.Audio(rate=self.rate) self.phones["sp"].data = np.zeros(self.sample_converter(self.sp_time), self.phones["sp"].nptype) self.phones["lp"] = SA.Audio(rate=self.rate) self.phones["lp"].data = np.zeros(self.sample_converter(self.lp_time), self.phones["lp"].nptype)
def get_wavs(self, wav_folder): """ Reads and stores wave files from a given folder Looks in a dictionary and checks for each .wav file. Creates an Audio object, loaded from the given wav file. Each Audio is then appended to a dictionary to be returned. Args: wav_folder (str): The filepath to read from. Returns: wavs: A dictionary of all waves as Audio objects mapped to a string key based on their filename, in format {filename: Audio} """ type_check([(wav_folder, str, False)]) wavs = {} for root, dirs, files in os.walk(wav_folder, topdown=False): for file in files: name, ext = os.path.splitext(file) if ext.lower() == '.wav': audio = SA.Audio() audio.load(wav_folder + '/' + file) wavs.update({name: audio}) return wavs
def __init__(self, wav_folder): self.out = SA.Audio(rate=16000) # Create a blank audio for output, with a frequency of 16000 self.phones = self.get_wavs(wav_folder) # Add wavs as audio objects for each phoneme # and additional elements for pause breaks self.add_phone_break('comma - break', 250) self.add_phone_break('sentence - break', 500) self.word_phones_dict = cmudict.dict()
def get_wavs(self, wav_folder): """For each wav file in wav_folder, create entry in phones dict with filename as key. For each dict entry, instantiate instance of SA.Audio. Pass monophone file to load() method of that class. Args: wav_folder containing monophone recordings Returns: dict of phone filenames and audio instances """ for root, dirs, files in os.walk(wav_folder, topdown=False): for file in files: if file != '.DS_Store': self.phones[file] = SA.Audio() self.phones[file].load('./%s/%s' % (wav_folder, file)) return self.phones
def add_phone_break(self, name, length, frequency=16000): """ Creates an Audio object representing a pause of a given length, adding it as a dictionary element to the phones. Calculates the sample rate for a millisecond. Creates an Audio object, and fills it with a numpy array of zeros. The number of zeros is calculated as the length * ms sample rate. The Audio object is then added to the phones dictionary Args: name (str): A string name to be the dictionary index length (int): The length of the break in ms frequency (int - optional) : Optional integer value for frequency, defaults to 16000 """ type_check([(name, str, False), (length, int, False), (frequency, int, False)]) ms = frequency / 1000 # sample rate for miliseconds audio = SA.Audio() audio.data = np.zeros(length * ms, np.int16) self.phones.update({name: audio})
def get_wavs(self, wav_folder): """ Loads each file in 'wav_folder' and adds the audio data to the dictionary 'self.phones'. The filename is set to be the dictionary key (note: ".wav" is removed from the filename) and the sampled audio data is set to be the dictionary value. :param wav_folder: folder containing wave files of monophones used in concatenation. :return: 'self.phones' dictionary """ try: for root, dirs, files in os.walk(wav_folder, topdown=False): for file in files: out = SA.Audio() out.load(os.path.join(wav_folder, file)) # Remove '.wav' from filename key = file.replace('.wav', '', 1) self.phones[key] = out.data return self.phones except KeyError: print 'The monophones folder could not be located. Make sure you have added it to the ' \ 'Python directory.'
def make_audio_out_array(phone_seq): """For each monophone in the utterance phone sequence, append to a numpy array the data for that monophone (from pron dict). Reformat the array using numpy's hstack() method, ready for playback. Args: phone_seq created by get_phone_seq Returns: audio_out_array (unless phone_seq is empty) """ audio_out_list = [] silence = SA.Audio() silence.create_noise(2500, 0) try: for phone in phone_seq: if phone == 'sil': audio_out_list.append(silence.data) else: filename = str(phone) + '.wav' audio_out_list.append(S.phones[filename].data) except: print 'Empty phone sequence cannot be synthesised.' sys.exit() audio_out_array = np.array(audio_out_list) audio_out_array = np.hstack(audio_out_array) return audio_out_array
m = re.sub(r'\d', '', key, 1) phones.append(m) return phones except KeyError: print 'Your phrase could not be tokenized. Please try another word.' sys.exit() if __name__ == "__main__": S = Synth(wav_folder=args.monophones) # Create object for 'Audio' class in SimpleAudio.py module # Modify 'out.data' to produce the correct synthesis out = SA.Audio(rate=16000) print out.data, type(out.data) phone_seq = get_phone_seq(args.phrase[0]) print phone_seq for phone in phone_seq: for key in S.phones: if phone == key: out.data = np.append(out.data, S.phones[key]) # Note that 'time_to_samples' appears to be malfunctional. The correct value the # times should be 0.25 and 0.50. These shorter values were chosen to more closely # matched the desired lengths of 250 ms and 500 ms. if phone == '#':
date_object = self.date_str_to_object(date_str) date_in_words = self.date_to_words(date_object.day) month_in_words = self.month_to_words(date_object.month) if len(date_str.split("/")) == 2: # only date and month return "the {} of {}".format(date_in_words, month_in_words) else: # date, month, year year_in_words = self.year_to_words(date_object.year) return "the {} of {} {}".format(date_in_words, month_in_words, year_in_words) if __name__ == "__main__": syn_rate = 16000 S = Synth(wav_folder=args.monophones, rate=syn_rate) out = SA.Audio(rate=syn_rate) # print out.data, type(out.data) # for testing phone_seq = get_phone_seq(args.phrase[0]) out.data = S.concatenate(phone_seq) # data modification if args.volume is not None: # ValueError will be handled by SA out.rescale(args.volume) print "synthesised audio is rescaled by a factor of %.4f" % args.volume # output of the modified audio if args.play: out.play() if args.outfile is not None: out.save(args.outfile)