Пример #1
0
def main(cfg: ServerConfig):
    global model, spect_parser, decoder, config, device, model2, model3
    global commo_model, dict_data, word_dict, char_dict
    commo_model, dict_data, word_dict, char_dict = transcribe_comma.loadModel()
    config = cfg
    model1Path = '/work/Source/deepspeech.pytorch/models/deepspeech_50_1600_gru_fpt.pth'
    logging.info('Setting up server...')
    device = torch.device("cuda" if cfg.model.cuda else "cpu")
    model = load_model(device=device,
                       model_path=model1Path,
                       use_half=cfg.model.use_half)
    logging.info('Loaded model 1')
    model2Path = '/work/Source/deepspeech.pytorch/models/deepspeech_1600_lstm_16_50_vin.pth'
    model2 = load_model(device=device,
                        model_path=model2Path,
                        use_half=cfg.model.use_half)

    logging.info('Loaded model 2')
    model3Path = '/work/Source/deepspeech.pytorch/models/deepspeech_1600_vinfpt_25_50.pth'
    model3 = load_model(device=device,
                        model_path=model3Path,
                        use_half=cfg.model.use_half)
    logging.info('Loaded model 3')
    decoder = load_decoder(labels=model.labels, cfg=cfg.lm)
    spect_parser = SpectrogramParser(audio_conf=model.audio_conf,
                                     normalize=True)
    spect_parser = SpectrogramParser(model.audio_conf, normalize=True)
    logging.info('Server initialised')
    app.run(host=cfg.host, port=cfg.port, debug=False, use_reloader=False)
Пример #2
0
def main(cfg: ServerConfig):
    global model, spect_parser, decoder, config, device
    config = cfg
    logging.getLogger().setLevel(logging.DEBUG)

    logging.info('Setting up server...')
    device = torch.device("cuda" if cfg.model.cuda else "cpu")

    model = load_model(device=device,
                       model_path=cfg.model.model_path,
                       use_half=cfg.model.use_half)

    decoder = load_decoder(
        "beam" if cfg.lm.decoder_type == DecoderType.beam else "greedy",
        model.labels, cfg.lm.lm_path, cfg.lm.alpha, cfg.lm.beta,
        cfg.lm.cutoff_top_n, cfg.lm.cutoff_prob, cfg.lm.beam_width,
        cfg.lm.lm_workers)

    spect_parser = SpectrogramParser(audio_conf=model.audio_conf,
                                     normalize=True)

    spect_parser = SpectrogramParser(model.audio_conf, normalize=True)
    logging.info('Server initialised')

    serve(app, host=cfg.host, port=cfg.port)
Пример #3
0
def transcribe(cfg: TranscribeConfig):
    device = torch.device("cuda" if cfg.model.cuda else "cpu")

    model = load_model(device=device,
                       model_path=cfg.model.model_path,
                       use_half=cfg.model.use_half)

    decoder = load_decoder(
        "beam" if cfg.lm.decoder_type == DecoderType.beam else "greedy",
        model.labels, cfg.lm.lm_path, cfg.lm.alpha, cfg.lm.beta,
        cfg.lm.cutoff_top_n, cfg.lm.cutoff_prob, cfg.lm.beam_width,
        cfg.lm.lm_workers)

    spect_parser = SpectrogramParser(audio_conf=model.audio_conf,
                                     normalize=True)

    start = time.time()
    decoded_output, decoded_offsets = run_transcribe(
        audio_path=cfg.audio_path,
        spect_parser=spect_parser,
        model=model,
        decoder=decoder,
        device=device,
        use_half=cfg.model.use_half)
    results = decode_results(decoded_output=decoded_output,
                             decoded_offsets=decoded_offsets,
                             cfg=cfg)
    end = time.time()

    print("Time taken: {}".format(end - start))
    print(json.dumps(results, ensure_ascii=False))
Пример #4
0
def run_transcribe(audio_path: str, spect_parser: SpectrogramParser,
                   model: DeepSpeech, decoder: Decoder, device: torch.device,
                   use_half: bool):
    # audio_path
    # try:
    #     # inTranscript = audio_path.replace("wav", "txt")
    #     # print(inTranscript)
    #     # getTranscript(inTranscript)
    #     pass
    # except Exception as asd:
    #     print(asd)
    #     pass
    spect = spect_parser.parse_audio(audio_path).contiguous()
    spect = spect.view(1, 1, spect.size(0), spect.size(1))
    spect = spect.to(device)
    if use_half:
        spect = spect.half()
    input_sizes = torch.IntTensor([spect.size(3)]).int()
    out, output_sizes = model(spect, input_sizes)
    decoded_output, decoded_offsets = decoder.decode(out, output_sizes)

    #Thêm vào greedy
    decoder2 = GreedyDecoder(labels=model.labels,
                             blank_index=model.labels.index('_'))
    decoded_output2, decoded_offsets2 = decoder2.decode(out, output_sizes)

    return decoded_output, decoded_output2, decoded_offsets, decoded_offsets2
Пример #5
0
def main(cfg: ServerConfig):
    global model, spect_parser, decoder, config, device
    config = cfg
    logging.getLogger().setLevel(logging.DEBUG)

    logging.info('Setting up server...')
    device = torch.device("cuda" if cfg.model.cuda else "cpu")

    model = load_model(device=device, model_path=cfg.model.model_path)

    decoder = load_decoder(labels=model.labels, cfg=cfg.lm)

    spect_parser = SpectrogramParser(audio_conf=model.audio_conf,
                                     normalize=True)

    spect_parser = SpectrogramParser(audio_conf=model.spect_cfg,
                                     normalize=True)
    logging.info('Server initialised')
    app.run(host=cfg.host, port=cfg.port, debug=True, use_reloader=False)
Пример #6
0
def run_transcribe(audio_path: str, spect_parser: SpectrogramParser,
                   model: DeepSpeech, decoder: Decoder, device: torch.device,
                   precision: int):
    spect = spect_parser.parse_audio(audio_path).contiguous()
    spect = spect.view(1, 1, spect.size(0), spect.size(1))
    spect = spect.to(device)
    input_sizes = torch.IntTensor([spect.size(3)]).int()
    with autocast(enabled=precision == 16):
        out, output_sizes = model(spect, input_sizes)
    decoded_output, decoded_offsets = decoder.decode(out, output_sizes)
    return decoded_output, decoded_offsets
Пример #7
0
def run_transcribe(audio_path: str, spect_parser: SpectrogramParser,
                   model: DeepSpeech, decoder: Decoder, device: torch.device,
                   use_half: bool):
    spect = spect_parser.parse_audio(audio_path).contiguous()
    spect = spect.view(1, 1, spect.size(0), spect.size(1))
    spect = spect.to(device)
    if use_half:
        spect = spect.half()
    input_sizes = torch.IntTensor([spect.size(3)]).int()
    out, output_sizes = model(spect, input_sizes)
    decoded_output, decoded_offsets = decoder.decode(out, output_sizes)
    return decoded_output, decoded_offsets
Пример #8
0
def transcribe(cfg: TranscribeConfig):
    commo_model, dict_data, word_dict, char_dict = transcribe_comma.loadModel()
    device = torch.device("cuda" if cfg.model.cuda else "cpu")

    model = load_model(device=device,
                       model_path=cfg.model.model_path,
                       use_half=cfg.model.use_half)

    decoder = load_decoder(labels=model.labels, cfg=cfg.lm)

    spect_parser = SpectrogramParser(audio_conf=model.audio_conf,
                                     normalize=True)

    #Đối với beamsearch decoded_putput cho ra mảng (1xbeam_width) với các phần tử là các câu có thể xảy ra:
    #VD: [["toi đi hộc", "tôi di hoc", "tôi đi ho",...]] 512 phần tử (beam_width=512)

    tim1 = time.time()
    decoded_output, decoded_outputGreedy, decoded_offsets, decoded_offsets2 = run_transcribe(
        audio_path=cfg.audio_path,
        spect_parser=spect_parser,
        model=model,
        decoder=decoder,
        device=device,
        use_half=cfg.model.use_half)
    results = decode_results(decoded_output=decoded_output,
                             decoded_offsets=decoded_offsets,
                             cfg=cfg)
    results2 = decode_results(decoded_output=decoded_outputGreedy,
                              decoded_offsets=decoded_offsets2,
                              cfg=cfg)
    resp = json.dumps(results, ensure_ascii=False)

    tim2 = time.time()
    print("Audio transcribe cost : " + str(tim2 - tim1))
    results['output'][0]['transcription'] = transcribe_comma.runTranscribe(
        commo_model, dict_data, word_dict, char_dict,
        results['output'][0]['transcription'])
    results2['output'][0]['transcription'] = transcribe_comma.runTranscribe(
        commo_model, dict_data, word_dict, char_dict,
        results2['output'][0]['transcription'])

    #print("DEBUG : ", resp)
    return results['output'][0]['transcription'], results2['output'][0][
        'transcription'], results['_meta']
Пример #9
0
def main():
    import argparse
    global model, spect_parser, decoder, args, device
    parser = argparse.ArgumentParser(
        description='DeepSpeech transcription server')
    parser.add_argument('--host',
                        type=str,
                        default='0.0.0.0',
                        help='Host to be used by the server')
    parser.add_argument('--port',
                        type=int,
                        default=8888,
                        help='Port to be used by the server')
    parser = add_inference_args(parser)
    parser = add_decoder_args(parser)
    args = parser.parse_args()
    logging.getLogger().setLevel(logging.DEBUG)

    logging.info('Setting up server...')
    torch.set_grad_enabled(False)
    device = torch.device("cuda" if args.cuda else "cpu")
    model = load_model(device, args.model_path, args.half)

    if args.decoder == "beam":
        from deepspeech_pytorch.decoder import BeamCTCDecoder

        decoder = BeamCTCDecoder(model.labels,
                                 lm_path=args.lm_path,
                                 alpha=args.alpha,
                                 beta=args.beta,
                                 cutoff_top_n=args.cutoff_top_n,
                                 cutoff_prob=args.cutoff_prob,
                                 beam_width=args.beam_width,
                                 num_processes=args.lm_workers)
    else:
        decoder = GreedyDecoder(model.labels,
                                blank_index=model.labels.index('_'))

    spect_parser = SpectrogramParser(model.audio_conf, normalize=True)
    logging.info('Server initialised')
    app.run(host=args.host, port=args.port, debug=True, use_reloader=False)
Пример #10
0
def transcribe(cfg: TranscribeConfig):
    device = torch.device("cuda" if cfg.model.cuda else "cpu")

    model = load_model(device=device, model_path=cfg.model.model_path)

    decoder = load_decoder(labels=model.labels, cfg=cfg.lm)

    spect_parser = SpectrogramParser(audio_conf=model.spect_cfg,
                                     normalize=True)

    decoded_output, decoded_offsets = run_transcribe(
        audio_path=hydra.utils.to_absolute_path(cfg.audio_path),
        spect_parser=spect_parser,
        model=model,
        decoder=decoder,
        device=device,
        precision=cfg.model.precision)
    results = decode_results(decoded_output=decoded_output,
                             decoded_offsets=decoded_offsets,
                             cfg=cfg)
    print(json.dumps(results))
Пример #11
0
def transcribe(cfg: TranscribeConfig):
    device = torch.device("cuda" if cfg.model.cuda else "cpu")

    model = load_model(device=device,
                       model_path=cfg.model.model_path,
                       use_half=cfg.model.use_half)

    decoder = load_decoder(labels=model.labels,
                           cfg=cfg.lm)

    spect_parser = SpectrogramParser(audio_conf=model.audio_conf,
                                     normalize=True)

    decoded_output, decoded_offsets = run_transcribe(audio_path=cfg.audio_path,
                                                     spect_parser=spect_parser,
                                                     model=model,
                                                     decoder=decoder,
                                                     device=device,
                                                     use_half=cfg.model.use_half)
    results = decode_results(decoded_output=decoded_output,
                             decoded_offsets=decoded_offsets,
                             cfg=cfg)
    print(json.dumps(results))
Пример #12
0
    arg_parser = add_decoder_args(arg_parser)
    args = arg_parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")
    model = load_model(device, args.model_path, args.half)

    decoder = load_decoder(decoder_type=args.decoder,
                           labels=model.labels,
                           lm_path=args.lm_path,
                           alpha=args.alpha,
                           beta=args.beta,
                           cutoff_top_n=args.cutoff_top_n,
                           cutoff_prob=args.cutoff_prob,
                           beam_width=args.beam_width,
                           lm_workers=args.lm_workers)

    spect_parser = SpectrogramParser(audio_conf=model.audio_conf,
                                     normalize=True)

    vad = webrtcvad.Vad()
    vad.set_mode(3)

    chunk = 320
    data_format = pyaudio.paInt16
    channels = 1
    sample_rate = 16000
    record_seconds = 10000

    p = pyaudio.PyAudio()

    stream = p.open(format=data_format,
                    channels=channels,
                    rate=sample_rate,