Пример #1
0
def synthesis(args, text, hp):
    """Decode with E2E-TTS model."""
    set_deterministic_pytorch(args)
    # read training config
    idim = hp.symbol_len
    odim = hp.num_mels
    model = FeedForwardTransformer(idim, odim, hp)
    print(model)

    if os.path.exists(args.path):
        print("\nSynthesis Session...\n")
        model.load_state_dict(torch.load(args.path), strict=False)
    else:
        print("Checkpoint not exixts")
        return None

    model.eval()

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    model = model.to(device)

    input = np.asarray(phonemes_to_sequence(text.split()))
    text = torch.LongTensor(input)
    text = text.cuda()
    # [num_char]

    with torch.no_grad():
        # decode and write
        idx = input[:5]
        start_time = time.time()
        print("text :", text.size())
        outs, probs, att_ws = model.inference(text, hp)
        print("Out size : ", outs.size())

        logging.info("inference speed = %s msec / frame." %
                     ((time.time() - start_time) / (int(outs.size(0)) * 1000)))
        if outs.size(0) == text.size(0) * args.maxlenratio:
            logging.warning("output length reaches maximum length .")

        print("mels", outs.size())
        mel = outs.cpu().numpy()  # [T_out, num_mel]
        print("numpy ", mel.shape)

        return mel
Пример #2
0
def synthesis_tts(args, text, path):
    """Decode with E2E-TTS model."""
    set_deterministic_pytorch(args)
    print("TTS synthesis")
    # read training config
    idim = hp.symbol_len
    odim = hp.num_mels
    print("Text :", text)
    input = np.asarray(phonemes_to_sequence(text.split()))
    print("Input :", input)
    model = FeedForwardTransformer(idim, odim)

    if os.path.exists(path):
        logging.info('\nSynthesis Session...\n')
        model.load_state_dict(torch.load(path), strict=False)
    else:
        logging.info("Checkpoint not exixts")
        return None

    model.eval()

    # set torch device
    device = torch.device("cuda" if args.ngpu > 0 else "cpu")
    model = model.to(device)

    text = torch.LongTensor(input)
    text = text.cuda()
    #[num_char]

        # define function for plot prob and att_ws
    def _plot_and_save(array, figname, figsize=(6, 4), dpi=150):
        import matplotlib.pyplot as plt
        shape = array.shape
        if len(shape) == 1:
            # for eos probability
            plt.figure(figsize=figsize, dpi=dpi)
            plt.plot(array)
            plt.xlabel("Frame")
            plt.ylabel("Probability")
            plt.ylim([0, 1])
        elif len(shape) == 2:
            # for tacotron 2 attention weights, whose shape is (out_length, in_length)
            plt.figure(figsize=figsize, dpi=dpi)
            plt.imshow(array, aspect="auto")
            plt.xlabel("Input")
            plt.ylabel("Output")
        elif len(shape) == 4:
            # for transformer attention weights, whose shape is (#leyers, #heads, out_length, in_length)
            plt.figure(figsize=(figsize[0] * shape[0], figsize[1] * shape[1]), dpi=dpi)
            for idx1, xs in enumerate(array):
                for idx2, x in enumerate(xs, 1):
                    plt.subplot(shape[0], shape[1], idx1 * shape[1] + idx2)
                    plt.imshow(x, aspect="auto")
                    plt.xlabel("Input")
                    plt.ylabel("Output")
        else:
            raise NotImplementedError("Support only from 1D to 4D array.")
        plt.tight_layout()
        if not os.path.exists(os.path.dirname(figname)):
            # NOTE: exist_ok = True is needed for parallel process decoding
            os.makedirs(os.path.dirname(figname), exist_ok=True)
        plt.savefig(figname)
        plt.close()

    with torch.no_grad():
        # decode and write
        idx = input[:5]
        start_time = time.time()
        print("predicting")
        outs, probs, att_ws = model.inference(text, args)

        logging.info("inference speed = %s msec / frame." % (
            (time.time() - start_time) / (int(outs.size(0)) * 1000)))
        if outs.size(0) == text.size(0) * 5:
            logging.warning("output length reaches maximum length .")

        mel = outs#.cpu().numpy() # [T_out, num_mel]

        

        return mel