def do_it(voicefile, text):
    voice = speect.SVoice(voicefile)
    utt = voice.synth(text)
    segment_relation = utt.relation_get("Segment")
    for item in segment_relation:
        hts_label = item["hts_label"]
        print hts_label
Exemplo n.º 2
0
    def _synthesize_single_helper(self,
                                  text,
                                  voice_code,
                                  output_file_path=None):
        """
        This is an helper function to synthesize a single text fragment via Python call.

        The caller can choose whether the output file should be written to disk or not.

        :rtype: tuple (result, (duration, sample_rate, encoding, data))
        """
        #
        # NOTE in this example, we assume that the Speect voice data files
        #      are located in the same directory of this .py source file
        #      and that the voice JSON file is called "voice.json"
        #
        # NOTE the voice_code value is ignored in this example,
        #      but in general one might select a voice file to load,
        #      depending on voice_code
        #
        voice_json_path = gf.safe_str(gf.absolute_path("voice.json", __file__))
        voice = speect.SVoice(voice_json_path)
        utt = voice.synth(text)
        audio = utt.features["audio"]
        if output_file_path is not None:
            audio.save_riff(gf.safe_str(output_file_path))

        # get length and data using speect Python API
        waveform = audio.get_audio_waveform()
        audio_sample_rate = int(waveform["samplerate"])
        audio_length = TimeValue(audio.num_samples() / audio_sample_rate)
        audio_format = "pcm16"
        audio_samples = numpy.fromstring(
            waveform["samples"], dtype=numpy.int16).astype("float64") / 32768

        # return data
        return (audio_length, audio_sample_rate, audio_format, audio_samples)
Exemplo n.º 3
0
    def _synthesize_single_python_helper(
        self,
        text,
        voice_code,
        output_file_path=None,
        return_audio_data=True
    ):
        """
        This is an helper function to synthesize a single text fragment via a Python call.

        If ``output_file_path`` is ``None``,
        the audio data will not persist to file at the end of the method.

        :rtype: tuple (result, (duration, sample_rate, encoding, data))
        """
        # return zero if text is the empty string
        if len(text) == 0:
            #
            # NOTE values of sample_rate, encoding, data
            #      do not matter if the duration is 0.000,
            #      so set them to None instead of the more precise:
            #      return (True, (TimeValue("0.000"), 16000, "pcm_s16le", numpy.array([])))
            #
            self.log(u"len(text) is zero: returning 0.000")
            return (True, (TimeValue("0.000"), None, None, None))

        #
        # NOTE in this example, we assume that the Speect voice data files
        #      are located in the same directory of this .py source file
        #      and that the voice JSON file is called "voice.json"
        #
        # NOTE the voice_code value is ignored in this example,
        #      since we have only one TTS voice,
        #      but in general one might select a voice file to load,
        #      depending on voice_code;
        #      in fact, we could have created the ``voice`` object
        #      only once, in the constructor, instead of creating it
        #      each time this function is invoked,
        #      achieving slightly faster synthesis
        #
        voice_json_path = gf.safe_str(gf.absolute_path("voice.json", __file__))
        voice = speect.SVoice(voice_json_path)
        utt = voice.synth(text)
        audio = utt.features["audio"]
        if output_file_path is None:
            self.log(u"output_file_path is None => not saving to file")
        else:
            self.log(u"output_file_path is not None => saving to file...")
            # NOTE apparently, save_riff needs the path to be a byte string
            audio.save_riff(gf.safe_str(output_file_path))
            self.log(u"output_file_path is not None => saving to file... done")

        # return immediately if returning audio data is not needed
        if not return_audio_data:
            self.log(u"return_audio_data is True => return immediately")
            return (True, None)

        # get length and data using speect Python API
        self.log(u"return_audio_data is True => read and return audio data")
        waveform = audio.get_audio_waveform()
        audio_sample_rate = int(waveform["samplerate"])
        audio_length = TimeValue(audio.num_samples() / audio_sample_rate)
        audio_format = "pcm16"
        audio_samples = numpy.fromstring(
            waveform["samples"],
            dtype=numpy.int16
        ).astype("float64") / 32768
        return (True, (
            audio_length,
            audio_sample_rate,
            audio_format,
            audio_samples
        ))
Exemplo n.º 4
0
def load_voice():
    import speect
    import speect.uttproc_cb  # utterance callback interface

    # load voice json file
    voice = speect.SVoice("voice.hts.json")  # load voice definition

    # load and add g2p
    import speect.modules.g2p_rewrites_festival as g2p_rewrites
    import ibibio_g2p
    ig2p = g2p_rewrites.G2P_Rewrites_Festival(ibibio_g2p.rules,
                                              ibibio_g2p.sets)

    # set g2p in voice
    voice.data_set("g2p", ig2p)

    # load and add syllabification
    import speect.modules.syllab_rewrites as syllab_rewrites
    import ibibio_syll
    isyll = syllab_rewrites.Syllab_Rewrites(ibibio_syll.rules,
                                            ibibio_syll.sets)

    # set syllabification in voice
    voice.data_set("syllabification", isyll)

    #
    # Create utterance processors
    #

    # create tokenizer
    import speect.modules.tokenize_processor as tokenize_processor
    tok_utt_proc = speect.SUttProcessor.callback(
        tokenize_processor.utt_processor)

    # create normalizer
    import speect.modules.normalize_processor as normalize_processor
    norm_utt_proc = speect.SUttProcessor.callback(
        normalize_processor.utt_processor)

    # create phrasing processor
    import speect.modules.phrasing_processor as phrasing_processor
    phrasing_utt_proc = speect.SUttProcessor.callback(
        phrasing_processor.utt_processor)

    # create lexical lookup processor
    import speect.modules.lexical_processor as lexical_processor
    lexlookup_utt_proc = speect.SUttProcessor.callback(
        lexical_processor.utt_processor)

    # create pause insertion processor
    import speect.modules.pause_processor as pause_processor
    pause_utt_proc = speect.SUttProcessor.callback(
        pause_processor.utt_processor)

    #
    # Replace voice definition file processors with above
    #
    voice.uttProcessor_set("Tokenize", tok_utt_proc)
    voice.uttProcessor_set("Normalize", norm_utt_proc)
    voice.uttProcessor_set("Phrasify", phrasing_utt_proc)
    voice.uttProcessor_set("LexLookup", lexlookup_utt_proc)
    voice.uttProcessor_set("Pauses", pause_utt_proc)

    return voice
Exemplo n.º 5
0
import speect
import speect.uttproc_cb


def do_it(utt):
    rel = utt.relation_new("Token")
    item = rel.append()
    item["name"] = "first"
    item = rel.append()
    item["name"] = "second"


utt_processor = speect.SUttProcessor.callback(do_it)

v = speect.SVoice("/home/aby/Development/testing_voices/eng-ZA/voice.txt")
v.uttProcessor_set("Tokenize", utt_processor)

utt = v.synth("hello world", "text-to-segments")

print(utt)
Exemplo n.º 6
0
 def loadvoice(self, voicename, voicefilename):
     log.info("Loading voice from file '%s'" % (voicefilename))
     v = speect.SVoice(voicefilename)
     log.info("Voice '%s' loaded." % (voicename))
     self.voices[voicename] = v