Exemplo n.º 1
0
def infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, fp16):

    mel_size = mel.size(2)
    batch_size = mel.size(0)
    stride = 256
    n_group = 8
    z_size = mel_size*stride
    z_size = z_size//n_group
    z = torch.randn(batch_size, n_group, z_size).cuda()
    audios = torch.zeros(batch_size, mel_size*stride).cuda()

    if fp16:
        z = z.half()
        mel = mel.half()
        audios = audios.half()

    waveglow_tensors = {
        "inputs" :
        {'mel': mel, 'z': z},
        "outputs" :
        {'audio': audios}
    }
    print("Running WaveGlow")
    with MeasureTime(measurements, "waveglow_time"):
        run_trt_engine(waveglow_context, waveglow, waveglow_tensors)

    return audios
Exemplo n.º 2
0
def infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, fp16):

    mel = mel.unsqueeze(3)
    mel_size = mel.size(2)
    batch_size = mel.size(0)
    stride = 256
    kernel_size = 1024
    n_group = 8
    z_size = (mel_size-1)*stride+(kernel_size-1)+1
    z_size = z_size - (kernel_size-stride)
    z_size = z_size//n_group
    z = torch.randn(batch_size, n_group, z_size, 1).cuda()
    audios = torch.zeros(batch_size, mel_size*stride).cuda()

    if fp16:
        z = z.half()
        mel = mel.half()
        audios = audios.half()

    waveglow_tensors = {
        # inputs
        'mel': mel, 'z': z,
        # outputs
        'audio': audios
    }
    print("Running WaveGlow")
    with MeasureTime(measurements, "waveglow_time"):
        run_trt_engine(waveglow_context, waveglow, waveglow_tensors)

    return audios
Exemplo n.º 3
0
def infer_waveglow_onnx(waveglow_path, mel, measurements, fp16):
    import onnx
    import onnxruntime
    sess = onnxruntime.InferenceSession(waveglow_path)

    device = mel.device
    mel_size = mel.size(2)
    batch_size = mel.size(0)
    stride = 256
    n_group = 8
    z_size = mel_size * stride
    z_size = z_size // n_group
    z = torch.randn(batch_size, n_group, z_size).cuda()

    mel = mel.unsqueeze(3)
    z = z.unsqueeze(3)

    if fp16:
        z = z.half()
        mel = mel.half()

    mel = mel.cpu().numpy().copy()
    z = z.cpu().numpy().copy()

    print("Running WaveGlow with ONNX Runtime")
    with MeasureTime(measurements, "waveglow_time"):
        result = sess.run(["audio"], {'mel': mel, 'z': z})
    audios = torch.tensor(result[0], device=device)
    return audios
def main():
    """
    Launches inference benchmark.
    Inference is executed on a single GPU.
    """
    parser = argparse.ArgumentParser(
        description='PyTorch FastPitch Inference Benchmark')
    parser = parse_args(parser)
    args, _ = parser.parse_known_args()

    log_file = args.log_file
    DLLogger.init(backends=[
        JSONStreamBackend(Verbosity.DEFAULT, args.log_file),
        StdOutBackend(Verbosity.VERBOSE)
    ])
    for k, v in vars(args).items():
        DLLogger.log(step="PARAMETER", data={k: v})
    DLLogger.log(step="PARAMETER", data={'model_name': 'FastPitch_PyT'})

    model = load_and_setup_model('FastPitch',
                                 parser,
                                 None,
                                 args.amp_run,
                                 'cuda',
                                 unk_args=[],
                                 forward_is_infer=True,
                                 ema=False,
                                 jitable=True)

    # FIXME Temporarily disabled due to nn.LayerNorm fp16 casting bug in pytorch:20.02-py3 and 20.03
    # model = torch.jit.script(model)

    warmup_iters = 3
    iters = 1
    gen_measures = MeasureTime()
    all_frames = 0
    for i in range(-warmup_iters, iters):
        text_padded = torch.randint(low=0,
                                    high=148,
                                    size=(args.batch_size, 128),
                                    dtype=torch.long).to('cuda')
        input_lengths = torch.IntTensor([text_padded.size(1)] *
                                        args.batch_size).to('cuda')
        durs = torch.ones_like(text_padded).mul_(4).to('cuda')

        with torch.no_grad(), gen_measures:
            mels, *_ = model(text_padded, input_lengths, dur_tgt=durs)
        num_frames = mels.size(0) * mels.size(2)

        if i >= 0:
            all_frames += num_frames
            DLLogger.log(step=(i, ), data={"latency": gen_measures[-1]})
            DLLogger.log(step=(i, ),
                         data={"frames/s": num_frames / gen_measures[-1]})

    measures = gen_measures[warmup_iters:]
    DLLogger.log(step=(), data={'avg latency': np.mean(measures)})
    DLLogger.log(step=(), data={'avg frames/s': all_frames / np.sum(measures)})
    DLLogger.flush()
Exemplo n.º 5
0
def main():

    parser = argparse.ArgumentParser(
        description='TensorRT Tacotron 2 Inference')
    parser = parse_args(parser)
    args, _ = parser.parse_known_args()

    # initialize CUDA state
    torch.cuda.init()

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    encoder = load_engine(args.encoder, TRT_LOGGER)
    decoder_iter = load_engine(args.decoder, TRT_LOGGER)
    postnet = load_engine(args.postnet, TRT_LOGGER)
    waveglow = load_engine(args.waveglow, TRT_LOGGER)

    if args.waveglow_ckpt != "":
        # setup denoiser using WaveGlow PyTorch checkpoint
        waveglow_ckpt = load_and_setup_model('WaveGlow', parser, args.waveglow_ckpt,
                                             True, forward_is_infer=True)
        denoiser = Denoiser(waveglow_ckpt).cuda()
        # after initialization, we don't need WaveGlow PyTorch checkpoint
        # anymore - deleting
        del waveglow_ckpt
        torch.cuda.empty_cache()

    # create TRT contexts for each engine
    encoder_context = encoder.create_execution_context()
    decoder_context = decoder_iter.create_execution_context()
    postnet_context = postnet.create_execution_context()
    waveglow_context = waveglow.create_execution_context()

    DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT,
                                              args.output+'/'+args.log_file),
                            StdOutBackend(Verbosity.VERBOSE)])

    texts = []
    try:
        f = open(args.input, 'r')
        texts = f.readlines()
    except:
        print("Could not read file")
        sys.exit(1)

    measurements = {}

    sequences, sequence_lengths = prepare_input_sequence(texts)
    sequences = sequences.to(torch.int32)
    sequence_lengths = sequence_lengths.to(torch.int32)
    with MeasureTime(measurements, "latency"):
        mel, mel_lengths = infer_tacotron2_trt(encoder, decoder_iter, postnet,
                                               encoder_context, decoder_context, postnet_context,
                                               sequences, sequence_lengths, measurements, args.fp16)
        audios = infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, args.fp16)

    with encoder_context, decoder_context,  postnet_context, waveglow_context:
        pass

    audios = audios.float()
    if args.waveglow_ckpt != "":
        with MeasureTime(measurements, "denoiser"):
            audios = denoiser(audios, strength=args.denoising_strength).squeeze(1)

    for i, audio in enumerate(audios):
        audio = audio[:mel_lengths[i]*args.stft_hop_length]
        audio = audio/torch.max(torch.abs(audio))
        audio_path = args.output + "audio_"+str(i)+"_trt.wav"
        write(audio_path, args.sampling_rate, audio.cpu().numpy())


    DLLogger.log(step=0, data={"tacotron2_encoder_latency": measurements['tacotron2_encoder_time']})
    DLLogger.log(step=0, data={"tacotron2_decoder_latency": measurements['tacotron2_decoder_time']})
    DLLogger.log(step=0, data={"tacotron2_postnet_latency": measurements['tacotron2_postnet_time']})
    DLLogger.log(step=0, data={"waveglow_latency": measurements['waveglow_time']})
    DLLogger.log(step=0, data={"latency": measurements['latency']})

    if args.waveglow_ckpt != "":
        DLLogger.log(step=0, data={"denoiser": measurements['denoiser']})
    DLLogger.flush()

    prec = "fp16" if args.fp16 else "fp32"
    latency = measurements['latency']
    throughput = audios.size(1)/latency
    log_data = "1,"+str(sequence_lengths[0].item())+","+prec+","+str(latency)+","+str(throughput)+","+str(mel_lengths[0].item())+"\n"
    with open("log_bs1_"+prec+".log", 'a') as f:
        f.write(log_data)
Exemplo n.º 6
0
def infer_tacotron2_trt(encoder, decoder_iter, postnet,
                        encoder_context, decoder_context, postnet_context,
                        sequences, sequence_lengths, measurements, fp16):

    memory = torch.zeros((len(sequence_lengths), sequence_lengths[0], 512)).cuda()
    if fp16:
        memory = memory.half()
    device = memory.device
    dtype = memory.dtype

    processed_memory = torch.zeros((len(sequence_lengths),sequence_lengths[0],128), device=device, dtype=dtype)
    lens = torch.zeros_like(sequence_lengths)

    encoder_tensors = {
        # inputs
        'sequences': sequences, 'sequence_lengths': sequence_lengths,
        # outputs
        'memory': memory, 'lens': lens, 'processed_memory': processed_memory
    }

    print("Running Tacotron2 Encoder")
    with MeasureTime(measurements, "tacotron2_encoder_time"):
        run_trt_engine(encoder_context, encoder, encoder_tensors)

    device = memory.device
    mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device = device)
    not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device = device)
    mel_outputs, gate_outputs, alignments = (torch.zeros(1, device = device), torch.zeros(1, device = device), torch.zeros(1, device = device))
    gate_threshold = 0.5
    max_decoder_steps = 1664
    first_iter = True

    decoder_inputs = init_decoder_inputs(memory, processed_memory, sequence_lengths)
    decoder_outputs = init_decoder_outputs(memory, sequence_lengths)

    print("Running Tacotron2 Decoder")
    while True:
        decoder_tensors = init_decoder_tensors(decoder_inputs, decoder_outputs)
        with MeasureTime(measurements, "step"):
            run_trt_engine(decoder_context, decoder_iter, decoder_tensors)

        if first_iter:
            mel_outputs = torch.unsqueeze(decoder_outputs[7], 2)
            gate_outputs = torch.unsqueeze(decoder_outputs[8], 2)
            alignments = torch.unsqueeze(decoder_outputs[4], 2)
            measurements['tacotron2_decoder_time'] = measurements['step']
            first_iter = False
        else:
            mel_outputs = torch.cat((mel_outputs, torch.unsqueeze(decoder_outputs[7], 2)), 2)
            gate_outputs = torch.cat((gate_outputs, torch.unsqueeze(decoder_outputs[8], 2)), 2)
            alignments = torch.cat((alignments, torch.unsqueeze(decoder_outputs[4], 2)), 2)
            measurements['tacotron2_decoder_time'] += measurements['step']

        dec = torch.le(torch.sigmoid(decoder_outputs[8]), gate_threshold).to(torch.int32).squeeze(1)
        not_finished = not_finished*dec
        mel_lengths += not_finished

        if torch.sum(not_finished) == 0:
            print("Stopping after",mel_outputs.size(2),"decoder steps")
            break
        if mel_outputs.size(2) == max_decoder_steps:
            print("Warning! Reached max decoder steps")
            break

        decoder_inputs, decoder_outputs = swap_inputs_outputs(decoder_inputs, decoder_outputs)

    mel_outputs_postnet = torch.zeros_like(mel_outputs, device=device, dtype=dtype)

    postnet_tensors = {
        # inputs
        'mel_outputs': mel_outputs,
        # outputs
        'mel_outputs_postnet': mel_outputs_postnet
    }
    print("Running Tacotron2 Postnet")
    with MeasureTime(measurements, "tacotron2_postnet_time"):
        run_trt_engine(postnet_context, postnet, postnet_tensors)

    print("Tacotron2 Postnet done")

    return mel_outputs_postnet, mel_lengths
def main():
    """
    Launches text to speech (inference).
    Inference is executed on a single GPU or CPU.
    """
    parser = argparse.ArgumentParser(
        description='PyTorch Tacotron 2 Inference')
    parser = parse_args(parser)
    args, unknown_args = parser.parse_known_args()

    DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, args.log_file),
                            StdOutBackend(Verbosity.VERBOSE)])
    for k,v in vars(args).items():
        DLLogger.log(step="PARAMETER", data={k:v})
    DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'})

    measurements_all = {"pre_processing": [],
                        "tacotron2_latency": [],
                        "waveglow_latency": [],
                        "latency": [],
                        "type_conversion": [],
                        "data_transfer": [],
                        "storage": [],
                        "tacotron2_items_per_sec": [],
                        "waveglow_items_per_sec": [],
                        "num_mels_per_audio": [],
                        "throughput": []}

    print("args:", args, unknown_args)

    tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2, args.amp_run, args.cpu_run, forward_is_infer=True)
    waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow, args.amp_run, args.cpu_run)

    if args.cpu_run:
        denoiser = Denoiser(waveglow, args.cpu_run)
    else:
        denoiser = Denoiser(waveglow, args.cpu_run).cuda()

    jitted_tacotron2 = torch.jit.script(tacotron2)

    texts = ["The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves. The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves."]
    texts = [texts[0][:args.input_length]]
    texts = texts*args.batch_size

    warmup_iters = 3

    for iter in range(args.num_iters):

        measurements = {}

        with MeasureTime(measurements, "pre_processing", args.cpu_run):
            sequences_padded, input_lengths = prepare_input_sequence(texts, args.cpu_run)

        with torch.no_grad():
            with MeasureTime(measurements, "latency", args.cpu_run):
                with MeasureTime(measurements, "tacotron2_latency", args.cpu_run):
                    mel, mel_lengths, _ = jitted_tacotron2(sequences_padded, input_lengths)

                with MeasureTime(measurements, "waveglow_latency", args.cpu_run):
                    audios = waveglow.infer(mel, sigma=args.sigma_infer)
                    audios = audios.float()
                    audios = denoiser(audios, strength=args.denoising_strength).squeeze(1)

        num_mels = mel.size(0)*mel.size(2)
        num_samples = audios.size(0)*audios.size(1)

        with MeasureTime(measurements, "type_conversion", args.cpu_run):
            audios = audios.float()

        with MeasureTime(measurements, "data_transfer", args.cpu_run):
            audios = audios.cpu()

        with MeasureTime(measurements, "storage", args.cpu_run):
            audios = audios.numpy()
            for i, audio in enumerate(audios):
                audio_path = "audio_"+str(i)+".wav"
                write(audio_path, args.sampling_rate,
                      audio[:mel_lengths[i]*args.stft_hop_length])

        measurements['tacotron2_items_per_sec'] = num_mels/measurements['tacotron2_latency']
        measurements['waveglow_items_per_sec'] = num_samples/measurements['waveglow_latency']
        measurements['num_mels_per_audio'] = mel.size(2)
        measurements['throughput'] = num_samples/measurements['latency']

        if iter >= warmup_iters:
            for k,v in measurements.items():
                measurements_all[k].append(v)
                DLLogger.log(step=(iter-warmup_iters), data={k: v})

    DLLogger.flush()

    print_stats(measurements_all)
Exemplo n.º 8
0
def main():
    """
    Launches inference benchmark.
    Inference is executed on a single GPU.
    """
    parser = argparse.ArgumentParser(
        description='PyTorch Tacotron 2 Inference')
    parser = parse_args(parser)
    args, _ = parser.parse_known_args()

    log_file = args.log_file

    DLLogger.init(backends=[
        JSONStreamBackend(Verbosity.DEFAULT, args.output + '/' +
                          args.log_file),
        StdOutBackend(Verbosity.VERBOSE)
    ])
    for k, v in vars(args).items():
        DLLogger.log(step="PARAMETER", data={k: v})
    DLLogger.log(step="PARAMETER", data={'model_name': 'Tacotron2_PyT'})

    model = load_and_setup_model(args.model_name,
                                 parser,
                                 None,
                                 args.amp_run,
                                 forward_is_infer=True)

    if args.model_name == "Tacotron2":
        model = torch.jit.script(model)

    warmup_iters = 3
    num_iters = 1 + warmup_iters

    for i in range(num_iters):

        measurements = {}

        if args.model_name == 'Tacotron2':
            text_padded = torch.randint(low=0,
                                        high=148,
                                        size=(args.batch_size, 140),
                                        dtype=torch.long).cuda()
            input_lengths = torch.IntTensor([text_padded.size(1)] *
                                            args.batch_size).cuda().long()
            with torch.no_grad(), MeasureTime(measurements, "inference_time"):
                mels, _, _ = model(text_padded, input_lengths)
            num_items = mels.size(0) * mels.size(2)

        if args.model_name == 'WaveGlow':
            n_mel_channels = model.upsample.in_channels
            num_mels = 895
            mel_padded = torch.zeros(args.batch_size, n_mel_channels,
                                     num_mels).normal_(-5.62, 1.98).cuda()
            if args.amp_run:
                mel_padded = mel_padded.half()

            with torch.no_grad(), MeasureTime(measurements, "inference_time"):
                audios = model(mel_padded)
                audios = audios.float()
            num_items = audios.size(0) * audios.size(1)

        if i >= warmup_iters:
            DLLogger.log(step=(i - warmup_iters, ),
                         data={"latency": measurements['inference_time']})
            DLLogger.log(step=(i - warmup_iters, ),
                         data={
                             "items_per_sec":
                             num_items / measurements['inference_time']
                         })

    DLLogger.log(step=tuple(),
                 data={'infer_latency': measurements['inference_time']})
    DLLogger.log(step=tuple(),
                 data={
                     'infer_items_per_sec':
                     num_items / measurements['inference_time']
                 })

    DLLogger.flush()
Exemplo n.º 9
0
def infer_tacotron2_trt(encoder, decoder_iter, postnet, encoder_context,
                        decoder_context, postnet_context, sequences,
                        sequence_lengths, measurements, fp16, loop):

    batch_size = len(sequence_lengths)
    max_sequence_len = sequence_lengths[0]
    memory = torch.zeros((batch_size, max_sequence_len, 512)).cuda()
    if fp16:
        memory = memory.half()
    device = memory.device
    dtype = memory.dtype

    processed_memory = torch.zeros((batch_size, max_sequence_len, 128),
                                   device=device,
                                   dtype=dtype)
    lens = torch.zeros_like(sequence_lengths)
    print(f"batch_size: {batch_size}, max sequence length: {max_sequence_len}")

    encoder_tensors = {
        "inputs": {
            'sequences': sequences,
            'sequence_lengths': sequence_lengths
        },
        "outputs": {
            'memory': memory,
            'lens': lens,
            'processed_memory': processed_memory
        }
    }

    print("Running Tacotron2 Encoder")
    with MeasureTime(measurements, "tacotron2_encoder_time"):
        run_trt_engine(encoder_context, encoder, encoder_tensors)
    max_decoder_steps = 1024
    device = memory.device
    mel_lengths = torch.zeros([memory.size(0)],
                              dtype=torch.int32,
                              device=device)
    not_finished = torch.ones([memory.size(0)],
                              dtype=torch.int32,
                              device=device)
    mel_outputs = torch.ones((batch_size, 80, max_decoder_steps),
                             device=device,
                             dtype=dtype).cuda()
    gate_threshold = 0.5
    first_iter = True

    decoder_inputs = init_decoder_inputs(memory, processed_memory,
                                         sequence_lengths)
    decoder_outputs = init_decoder_outputs(memory, sequence_lengths)

    if loop:
        if decoder_context is None:
            print("Running Tacotron2 Decoder with loop with ONNX-RT")
            decoder_inputs_onnxrt = [
                x.cpu().numpy().copy() for x in decoder_inputs
            ]
            import onnx
            import onnxruntime
            sess = onnxruntime.InferenceSession(decoder_iter)

            with MeasureTime(measurements, "tacotron2_decoder_time"):
                result = sess.run(
                    ["mel_outputs", "mel_lengths_t"], {
                        'decoder_input_0': decoder_inputs_onnxrt[0],
                        'attention_hidden_0': decoder_inputs_onnxrt[1],
                        'attention_cell_0': decoder_inputs_onnxrt[2],
                        'decoder_hidden_0': decoder_inputs_onnxrt[3],
                        'decoder_cell_0': decoder_inputs_onnxrt[4],
                        'attention_weights_0': decoder_inputs_onnxrt[5],
                        'attention_weights_cum_0': decoder_inputs_onnxrt[6],
                        'attention_context_0': decoder_inputs_onnxrt[7],
                        'memory': decoder_inputs_onnxrt[8],
                        'processed_memory': decoder_inputs_onnxrt[9],
                        'mask': decoder_inputs_onnxrt[10]
                    })

            mel_outputs = torch.tensor(result[0], device=device)
            mel_lengths = torch.tensor(result[1], device=device)
        else:
            print("Running Tacotron2 Decoder with loop")
            decoder_tensors = {
                "inputs": {
                    'decoder_input_0': decoder_inputs[0],
                    'attention_hidden_0': decoder_inputs[1],
                    'attention_cell_0': decoder_inputs[2],
                    'decoder_hidden_0': decoder_inputs[3],
                    'decoder_cell_0': decoder_inputs[4],
                    'attention_weights_0': decoder_inputs[5],
                    'attention_weights_cum_0': decoder_inputs[6],
                    'attention_context_0': decoder_inputs[7],
                    'memory': decoder_inputs[8],
                    'processed_memory': decoder_inputs[9],
                    'mask': decoder_inputs[10]
                },
                "outputs": {
                    'mel_outputs': mel_outputs,
                    'mel_lengths_t': mel_lengths
                }
            }

            with MeasureTime(measurements, "tacotron2_decoder_time"):
                run_trt_engine(decoder_context, decoder_iter, decoder_tensors)
            mel_outputs = mel_outputs[:, :, :torch.max(mel_lengths)]

    else:
        print("Running Tacotron2 Decoder")
        measurements_decoder = {}
        while True:
            decoder_tensors = init_decoder_tensors(decoder_inputs,
                                                   decoder_outputs)
            with MeasureTime(measurements_decoder, "step"):
                run_trt_engine(decoder_context, decoder_iter, decoder_tensors)

            if first_iter:
                mel_outputs = torch.unsqueeze(decoder_outputs[7], 2)
                gate_outputs = torch.unsqueeze(decoder_outputs[8], 2)
                alignments = torch.unsqueeze(decoder_outputs[4], 2)
                measurements['tacotron2_decoder_time'] = measurements_decoder[
                    'step']
                first_iter = False
            else:
                mel_outputs = torch.cat(
                    (mel_outputs, torch.unsqueeze(decoder_outputs[7], 2)), 2)
                gate_outputs = torch.cat(
                    (gate_outputs, torch.unsqueeze(decoder_outputs[8], 2)), 2)
                alignments = torch.cat(
                    (alignments, torch.unsqueeze(decoder_outputs[4], 2)), 2)
                measurements['tacotron2_decoder_time'] += measurements_decoder[
                    'step']

            dec = torch.le(torch.sigmoid(decoder_outputs[8]),
                           gate_threshold).to(torch.int32).squeeze(1)
            not_finished = not_finished * dec
            mel_lengths += not_finished

            if torch.sum(not_finished) == 0:
                print("Stopping after", mel_outputs.size(2), "decoder steps")
                break
            if mel_outputs.size(2) == max_decoder_steps:
                print("Warning! Reached max decoder steps")
                break

            decoder_inputs, decoder_outputs = swap_inputs_outputs(
                decoder_inputs, decoder_outputs)

    mel_outputs = mel_outputs.clone().detach()
    mel_outputs_postnet = torch.zeros_like(mel_outputs,
                                           device=device,
                                           dtype=dtype)

    postnet_tensors = {
        "inputs": {
            'mel_outputs': mel_outputs
        },
        "outputs": {
            'mel_outputs_postnet': mel_outputs_postnet
        }
    }
    print("Running Tacotron2 Postnet")
    with MeasureTime(measurements, "tacotron2_postnet_time"):
        run_trt_engine(postnet_context, postnet, postnet_tensors)

    print("Tacotron2 Postnet done")

    return mel_outputs_postnet, mel_lengths
Exemplo n.º 10
0
def main():
    """
    Launches text to speech (inference).
    Inference is executed on a single GPU.
    """
    parser = argparse.ArgumentParser(
        description='PyTorch Tacotron 2 Inference')
    parser = parse_args(parser)
    args, unknown_args = parser.parse_known_args()

    DLLogger.init(backends=[
        JSONStreamBackend(Verbosity.DEFAULT, args.log_file),
        StdOutBackend(Verbosity.VERBOSE)
    ])
    for k, v in vars(args).items():
        DLLogger.log(step="PARAMETER", data={k: v})
    DLLogger.log(step="PARAMETER", data={'model_name': 'Tacotron2_PyT'})

    measurements_all = {
        "pre_processing": [],
        "tacotron2_encoder_time": [],
        "tacotron2_decoder_time": [],
        "tacotron2_postnet_time": [],
        "tacotron2_latency": [],
        "waveglow_latency": [],
        "latency": [],
        "type_conversion": [],
        "data_transfer": [],
        "storage": [],
        "tacotron2_items_per_sec": [],
        "waveglow_items_per_sec": [],
        "num_mels_per_audio": [],
        "throughput": []
    }

    print("args:", args, unknown_args)

    torch.cuda.init()

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    encoder = load_engine(args.encoder, TRT_LOGGER)
    decoder_iter = load_engine(args.decoder, TRT_LOGGER)
    postnet = load_engine(args.postnet, TRT_LOGGER)
    waveglow = load_engine(args.waveglow, TRT_LOGGER)

    if args.waveglow_ckpt != "":
        # setup denoiser using WaveGlow PyTorch checkpoint
        waveglow_ckpt = load_and_setup_model('WaveGlow',
                                             parser,
                                             args.waveglow_ckpt,
                                             fp16_run=args.fp16,
                                             cpu_run=False,
                                             forward_is_infer=True)
        denoiser = Denoiser(waveglow_ckpt).cuda()
        # after initialization, we don't need WaveGlow PyTorch checkpoint
        # anymore - deleting
        del waveglow_ckpt
        torch.cuda.empty_cache()

    # create TRT contexts for each engine
    encoder_context = encoder.create_execution_context()
    decoder_context = decoder_iter.create_execution_context()
    postnet_context = postnet.create_execution_context()
    waveglow_context = waveglow.create_execution_context()

    texts = [
        "The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves. The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves."
    ]
    texts = [texts[0][:args.input_length]]
    texts = texts * args.batch_size

    warmup_iters = 3

    for iter in range(args.num_iters):

        measurements = {}

        with MeasureTime(measurements, "pre_processing"):
            sequences_padded, input_lengths = prepare_input_sequence(texts)
            sequences_padded = sequences_padded.to(torch.int32)
            input_lengths = input_lengths.to(torch.int32)

        with torch.no_grad():
            with MeasureTime(measurements, "latency"):
                with MeasureTime(measurements, "tacotron2_latency"):
                    mel, mel_lengths = infer_tacotron2_trt(
                        encoder, decoder_iter, postnet, encoder_context,
                        decoder_context, postnet_context, sequences_padded,
                        input_lengths, measurements, args.fp16)

                with MeasureTime(measurements, "waveglow_latency"):
                    audios = infer_waveglow_trt(waveglow, waveglow_context,
                                                mel, measurements, args.fp16)

        num_mels = mel.size(0) * mel.size(2)
        num_samples = audios.size(0) * audios.size(1)

        with MeasureTime(measurements, "type_conversion"):
            audios = audios.float()

        with MeasureTime(measurements, "data_transfer"):
            audios = audios.cpu()

        with MeasureTime(measurements, "storage"):
            audios = audios.numpy()
            for i, audio in enumerate(audios):
                audio_path = "audio_" + str(i) + ".wav"
                write(audio_path, args.sampling_rate,
                      audio[:mel_lengths[i] * args.stft_hop_length])

        measurements['tacotron2_items_per_sec'] = num_mels / measurements[
            'tacotron2_latency']
        measurements['waveglow_items_per_sec'] = num_samples / measurements[
            'waveglow_latency']
        measurements['num_mels_per_audio'] = mel.size(2)
        measurements['throughput'] = num_samples / measurements['latency']

        if iter >= warmup_iters:
            for k, v in measurements.items():
                if k in measurements_all.keys():
                    measurements_all[k].append(v)
                    DLLogger.log(step=(iter - warmup_iters), data={k: v})

    DLLogger.flush()

    print_stats(measurements_all)
Exemplo n.º 11
0
def main():
    """
    Launches inference benchmark.
    Inference is executed on a single GPU.
    """
    parser = argparse.ArgumentParser(
        description='PyTorch Tacotron 2 Inference')
    parser = parse_args(parser)
    args, _ = parser.parse_known_args()

    log_file = args.log_file

    LOGGER.set_model_name("Tacotron2_PyT")
    LOGGER.set_backends([
        dllg.StdOutBackend(log_file=None,
                           logging_scope=dllg.TRAIN_ITER_SCOPE,
                           iteration_interval=1),
        dllg.JsonBackend(log_file,
                         logging_scope=dllg.TRAIN_ITER_SCOPE,
                         iteration_interval=1)
    ])
    LOGGER.register_metric("items_per_sec", metric_scope=dllg.TRAIN_ITER_SCOPE)
    LOGGER.register_metric("latency", metric_scope=dllg.TRAIN_ITER_SCOPE)

    log_hardware()
    log_args(args)

    model = load_and_setup_model(args.model_name, parser, None, args.amp_run)

    warmup_iters = 3
    num_iters = 1 + warmup_iters

    for i in range(num_iters):
        if i >= warmup_iters:
            LOGGER.iteration_start()

        measurements = {}

        if args.model_name == 'Tacotron2':
            text_padded = torch.randint(low=0,
                                        high=148,
                                        size=(args.batch_size, 140),
                                        dtype=torch.long).cuda()
            input_lengths = torch.IntTensor([text_padded.size(1)] *
                                            args.batch_size).cuda().long()
            with torch.no_grad(), MeasureTime(measurements, "inference_time"):
                mels, _ = model.infer(text_padded, input_lengths)
            num_items = mels.size(0) * mels.size(2)

        if args.model_name == 'WaveGlow':
            n_mel_channels = model.upsample.in_channels
            num_mels = 895
            mel_padded = torch.zeros(args.batch_size, n_mel_channels,
                                     num_mels).normal_(-5.62, 1.98).cuda()
            if args.amp_run:
                mel_padded = mel_padded.half()

            with torch.no_grad(), MeasureTime(measurements, "inference_time"):
                audios = model.infer(mel_padded)
                audios = audios.float()
            num_items = audios.size(0) * audios.size(1)

        if i >= warmup_iters:
            LOGGER.log(key="items_per_sec",
                       value=(num_items / measurements['inference_time']))
            LOGGER.log(key="latency", value=measurements['inference_time'])
            LOGGER.iteration_stop()

    LOGGER.finish()
def main():
    """
    Launches inference benchmark.
    Inference is executed on a single GPU.
    """
    parser = argparse.ArgumentParser(
        description='PyTorch Tacotron 2 Inference')
    parser = parse_args(parser)
    args, _ = parser.parse_known_args()

    log_file = os.path.join(args.output, args.log_file)

    DLLogger.init(backends=[
        JSONStreamBackend(Verbosity.DEFAULT, log_file),
        StdOutBackend(Verbosity.VERBOSE)
    ])
    for k, v in vars(args).items():
        DLLogger.log(step="PARAMETER", data={k: v})
    DLLogger.log(step="PARAMETER", data={'model_name': 'Tacotron2_PyT'})

    if args.synth_data:
        model = load_and_setup_model(args.model_name,
                                     parser,
                                     None,
                                     args.fp16,
                                     cpu_run=False,
                                     forward_is_infer=True)
    else:
        if not os.path.isfile(args.model):
            print(f"File {args.model} does not exist!")
            sys.exit(1)
        model = load_and_setup_model(args.model_name,
                                     parser,
                                     args.model,
                                     args.fp16,
                                     cpu_run=False,
                                     forward_is_infer=True)

    if args.model_name == "Tacotron2":
        model = torch.jit.script(model)

    warmup_iters = 3
    num_iters = 1 + warmup_iters

    for i in range(num_iters):

        measurements = {}

        if args.model_name == 'Tacotron2':
            text_padded, input_lengths = gen_text(args.synth_data)

            with torch.no_grad(), MeasureTime(measurements, "inference_time"):
                mels, _, _ = model(text_padded, input_lengths)
            num_items = mels.size(0) * mels.size(2)

        if args.model_name == 'WaveGlow':

            n_mel_channels = model.upsample.in_channels
            mel_padded = gen_mel(args.synth_data, n_mel_channels, args.fp16)

            with torch.no_grad(), MeasureTime(measurements, "inference_time"):
                audios = model(mel_padded)
                audios = audios.float()
            num_items = audios.size(0) * audios.size(1)

        if i >= warmup_iters:
            DLLogger.log(step=(i - warmup_iters, ),
                         data={"latency": measurements['inference_time']})
            DLLogger.log(step=(i - warmup_iters, ),
                         data={
                             "items_per_sec":
                             num_items / measurements['inference_time']
                         })

    DLLogger.log(step=tuple(),
                 data={'infer_latency': measurements['inference_time']})
    DLLogger.log(step=tuple(),
                 data={
                     'infer_items_per_sec':
                     num_items / measurements['inference_time']
                 })

    DLLogger.flush()