def get_wavs(self, wav_folder):
     for root, dirs, files in os.walk(
             wav_folder, topdown=False):  # loading phonemes from file
         for file in files:  # file names -> str
             phone_name = file.split('.')[0]
             self.phones[phone_name] = SA.Audio(
             )  # each phone name as each phone object
             self.phones[phone_name].load(os.path.join(wav_folder, file))
     # load for short pause and long pause, and create data (salience) (for punctuation)
     # reference: add echo method in SimpleAudio
     self.phones["sp"] = SA.Audio(rate=self.rate)
     self.phones["sp"].data = np.zeros(self.sample_converter(self.sp_time),
                                       self.phones["sp"].nptype)
     self.phones["lp"] = SA.Audio(rate=self.rate)
     self.phones["lp"].data = np.zeros(self.sample_converter(self.lp_time),
                                       self.phones["lp"].nptype)
Пример #2
0
    def get_wavs(self, wav_folder):
        """ Reads and stores wave files from a given folder

        Looks in a dictionary and checks for each .wav file.
        Creates an Audio object, loaded from the given wav file.
        Each Audio is then appended to a dictionary to be returned.

        Args:
            wav_folder (str): The filepath to read from.

        Returns:
            wavs: A dictionary of all waves as Audio objects mapped to a string key based on their filename, in format {filename: Audio}
        """

        type_check([(wav_folder, str, False)])

        wavs = {}

        for root, dirs, files in os.walk(wav_folder, topdown=False):
            for file in files:
                name, ext = os.path.splitext(file)
                if ext.lower() == '.wav':
                    audio = SA.Audio()
                    audio.load(wav_folder + '/' + file)
                    wavs.update({name: audio})
        return wavs
Пример #3
0
    def __init__(self, wav_folder):
        self.out = SA.Audio(rate=16000) # Create a blank audio for output, with a frequency of 16000

        self.phones = self.get_wavs(wav_folder) # Add wavs as audio objects for each phoneme
                                                # and additional elements for pause breaks

        self.add_phone_break('comma - break', 250)
        self.add_phone_break('sentence - break', 500)

        self.word_phones_dict = cmudict.dict()
Пример #4
0
 def get_wavs(self, wav_folder):
     """For each wav file in wav_folder, create entry in phones dict with filename as key.
        For each dict entry, instantiate instance of SA.Audio. Pass monophone file to load() method of that class.
     Args:
         wav_folder containing monophone recordings
     Returns:
         dict of phone filenames and audio instances
     """
     for root, dirs, files in os.walk(wav_folder, topdown=False):
         for file in files:
             if file != '.DS_Store':
                 self.phones[file] = SA.Audio()
                 self.phones[file].load('./%s/%s' % (wav_folder, file))
     return self.phones
Пример #5
0
    def add_phone_break(self, name, length, frequency=16000):
        """ Creates an Audio object representing a pause of a given length, adding it as a dictionary element to the phones.

        Calculates the sample rate for a millisecond.
        Creates an Audio object, and fills it with a numpy array of zeros.
        The number of zeros is calculated as the length * ms sample rate.
        The Audio object is then added to the phones dictionary

        Args:
            name (str): A string name to be the dictionary index
            length (int): The length of the break in ms
            frequency (int - optional) : Optional integer value for frequency, defaults to 16000
        """
        type_check([(name, str, False), (length, int, False), (frequency, int, False)])

        ms = frequency / 1000  # sample rate for miliseconds
        audio = SA.Audio()
        audio.data = np.zeros(length * ms, np.int16)

        self.phones.update({name: audio})
Пример #6
0
    def get_wavs(self, wav_folder):
        """
        Loads each file in 'wav_folder' and adds the audio data to the dictionary 'self.phones'. The
        filename is set to be the dictionary key (note: ".wav" is removed from the filename) and the
        sampled audio data is set to be the dictionary value.

        :param wav_folder: folder containing wave files of monophones used in concatenation.
        :return: 'self.phones' dictionary
        """
        try:
            for root, dirs, files in os.walk(wav_folder, topdown=False):
                for file in files:
                    out = SA.Audio()
                    out.load(os.path.join(wav_folder, file))

                    # Remove '.wav' from filename
                    key = file.replace('.wav', '', 1)
                    self.phones[key] = out.data
            return self.phones

        except KeyError:
            print 'The monophones folder could not be located. Make sure you have added it to the ' \
                  'Python directory.'
Пример #7
0
def make_audio_out_array(phone_seq):
    """For each monophone in the utterance phone sequence, append to a numpy array the data for that
       monophone (from pron dict). Reformat the array using numpy's hstack() method, ready for playback.
    Args:
        phone_seq created by get_phone_seq
    Returns:
        audio_out_array (unless phone_seq is empty)
    """
    audio_out_list = []
    silence = SA.Audio()
    silence.create_noise(2500, 0)
    try:
        for phone in phone_seq:
            if phone == 'sil':
                audio_out_list.append(silence.data)
            else:
                filename = str(phone) + '.wav'
                audio_out_list.append(S.phones[filename].data)
    except:
        print 'Empty phone sequence cannot be synthesised.'
        sys.exit()
    audio_out_array = np.array(audio_out_list)
    audio_out_array = np.hstack(audio_out_array)
    return audio_out_array
Пример #8
0
            m = re.sub(r'\d', '', key, 1)
            phones.append(m)

        return phones

    except KeyError:
        print 'Your phrase could not be tokenized. Please try another word.'
        sys.exit()


if __name__ == "__main__":
    S = Synth(wav_folder=args.monophones)

    # Create object for 'Audio' class in SimpleAudio.py module
    # Modify 'out.data' to produce the correct synthesis
    out = SA.Audio(rate=16000)
    print out.data, type(out.data)

    phone_seq = get_phone_seq(args.phrase[0])

    print phone_seq

    for phone in phone_seq:
        for key in S.phones:
            if phone == key:
                out.data = np.append(out.data, S.phones[key])

            # Note that 'time_to_samples' appears to be malfunctional. The correct value the
            # times should be 0.25 and 0.50. These shorter values were chosen to more closely
            # matched the desired lengths of 250 ms and 500 ms.
            if phone == '#':
        date_object = self.date_str_to_object(date_str)
        date_in_words = self.date_to_words(date_object.day)
        month_in_words = self.month_to_words(date_object.month)
        if len(date_str.split("/")) == 2:  # only date and month
            return "the {} of {}".format(date_in_words, month_in_words)
        else:  # date, month, year
            year_in_words = self.year_to_words(date_object.year)
            return "the {} of {} {}".format(date_in_words, month_in_words,
                                            year_in_words)


if __name__ == "__main__":
    syn_rate = 16000
    S = Synth(wav_folder=args.monophones, rate=syn_rate)

    out = SA.Audio(rate=syn_rate)
    # print out.data, type(out.data) # for testing

    phone_seq = get_phone_seq(args.phrase[0])
    out.data = S.concatenate(phone_seq)

    # data modification
    if args.volume is not None:  # ValueError will be handled by SA
        out.rescale(args.volume)
        print "synthesised audio is rescaled by a factor of %.4f" % args.volume

    # output of the modified audio
    if args.play:
        out.play()
    if args.outfile is not None:
        out.save(args.outfile)