Exemplo n.º 1
0
def create_NMs(tacotron2_params, logger=None, decoder_infer=False):
    data_preprocessor = nemo_asr.AudioPreprocessing(
        **tacotron2_params["AudioPreprocessing"])
    text_embedding = nemo_tts.TextEmbedding(
        len(tacotron2_params["labels"]), **tacotron2_params["TextEmbedding"])
    t2_enc = nemo_tts.Tacotron2Encoder(**tacotron2_params["Tacotron2Encoder"])
    if decoder_infer:
        t2_dec = nemo_tts.Tacotron2DecoderInfer(
            **tacotron2_params["Tacotron2Decoder"])
    else:
        t2_dec = nemo_tts.Tacotron2Decoder(
            **tacotron2_params["Tacotron2Decoder"])
    t2_postnet = nemo_tts.Tacotron2Postnet(
        **tacotron2_params["Tacotron2Postnet"])
    t2_loss = nemo_tts.Tacotron2Loss()
    makegatetarget = nemo_tts.MakeGate()

    if logger:
        total_weights = (text_embedding.num_weights + t2_enc.num_weights +
                         t2_dec.num_weights + t2_postnet.num_weights)

        logger.info('================================')
        logger.info(f"Total number of parameters: {total_weights}")
        logger.info('================================')
    return (data_preprocessor, text_embedding, t2_enc, t2_dec, t2_postnet,
            t2_loss, makegatetarget)
Exemplo n.º 2
0
def create_NMs(tacotron2_params, decoder_infer=False):
    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
        **tacotron2_params["AudioToMelSpectrogramPreprocessor"])
    text_embedding = nemo_tts.TextEmbedding(
        len(tacotron2_params["labels"]) + 3,  # + 3 special chars
        **tacotron2_params["TextEmbedding"])
    t2_enc = nemo_tts.Tacotron2Encoder(**tacotron2_params["Tacotron2Encoder"])
    if decoder_infer:
        t2_dec = nemo_tts.Tacotron2DecoderInfer(
            **tacotron2_params["Tacotron2Decoder"])
    else:
        t2_dec = nemo_tts.Tacotron2Decoder(
            **tacotron2_params["Tacotron2Decoder"])
    t2_postnet = nemo_tts.Tacotron2Postnet(
        **tacotron2_params["Tacotron2Postnet"])
    t2_loss = nemo_tts.Tacotron2Loss(**tacotron2_params["Tacotron2Loss"])
    makegatetarget = nemo_tts.MakeGate()

    total_weights = (text_embedding.num_weights + t2_enc.num_weights +
                     t2_dec.num_weights + t2_postnet.num_weights)

    nemo.logging.info('================================')
    nemo.logging.info(f"Total number of parameters: {total_weights}")
    nemo.logging.info('================================')
    return (data_preprocessor, text_embedding, t2_enc, t2_dec, t2_postnet,
            t2_loss, makegatetarget)
Exemplo n.º 3
0
    def __init__(self):
        """Loads pre-trained ASR model"""
        device = nemo.core.DeviceType.CPU
        self.nf = nemo.core.NeuralModuleFactory(placement=device)

        # Create text to spectrogram model
        self.tts_conf = parse_yaml("conf.yaml")["tts"]
        self.tacotron2_params = parse_yaml(
            os.path.join(self.tts_conf["model_dir"], "tacotron2.yaml"))

        self.nf.logger.info('================================')
        # create text embedding module
        self.text_embedding = nemo_tts.TextEmbedding(
                len(self.tacotron2_params["labels"]) + 3, # + 3 special chars
                **self.tacotron2_params["TextEmbedding"])
        self.text_embedding.restore_from(
            os.path.join(self.tts_conf["model_dir"], "TextEmbedding.pt"))
        self.nf.logger.info(f"Number of parameters in text-embedding: "
                            f"{self.text_embedding.num_weights}")
        
        # create encoder
        self.t2_enc = nemo_tts.Tacotron2Encoder(
                    **self.tacotron2_params["Tacotron2Encoder"])
        self.t2_enc.restore_from(
            os.path.join(self.tts_conf["model_dir"], "Tacotron2Encoder.pt"))
        self.nf.logger.info(
            f"Number of parameters in encoder: {self.t2_enc.num_weights}")
        
        # create decoder
        self.t2_dec = nemo_tts.Tacotron2DecoderInfer(
                    **self.tacotron2_params["Tacotron2Decoder"])
        self.t2_dec.restore_from(
            os.path.join(self.tts_conf["model_dir"], "Tacotron2Decoder.pt"))
        self.nf.logger.info(
            f"Number of parameters in decoder: {self.t2_dec.num_weights}")
        
        # create PostNet
        self.t2_postnet = nemo_tts.Tacotron2Postnet(
                    **self.tacotron2_params["Tacotron2Postnet"])
        self.t2_postnet.restore_from(
            os.path.join(self.tts_conf["model_dir"], "Tacotron2Postnet.pt"))
        self.nf.logger.info(
            f"Number of parameters in postnet: {self.t2_postnet.num_weights}")

        total_weights= self.text_embedding.num_weights+self.t2_enc.num_weights \
                        + self.t2_dec.num_weights + self.t2_postnet.num_weights
        self.nf.logger.info(f"Total number of parameters in model: "
                            f"{total_weights}")

        # load waveglow if chosen
        if self.tts_conf["vocoder"] == "waveglow":
            self.nf.logger.info("Loading waveglow as a vocoder")
            self.waveglow_params = parse_yaml(
                os.path.join(self.tts_conf["vocoder_dir"], "waveglow.yaml"))
            self.waveglow = nemo_tts.WaveGlowInferNM(
                                    sigma = self.tts_conf["sigma"],
                                    **self.waveglow_params["WaveGlowNM"])
            self.waveglow.restore_from(
                os.path.join(self.tts_conf["vocoder_dir"], "WaveGlowNM.pt"))
            if self.tts_conf["denoising_strength"] > 0:
                self.nf.logger.info("Setting up a denoiser for waveglow")
                self.waveglow.setup_denoiser()
                self.nf.logger.info("Waveglow denoiser is ready")
        self.nf.logger.info('================================')