def recognize(args): model, LFR_m, LFR_n = Transformer.load_model(args.model_path) print(model) model.eval() model.cuda() char_list, sos_id, eos_id = process_dict(args.dict) assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id # read json data with open(args.recog_json, 'rb') as f: js = json.load(f)['utts'] # import Language Model lm_model = kenlm.Model(args.lm_path) # decode each utterance new_js = {} with torch.no_grad(): for idx, name in enumerate(js.keys(), 1): print('(%d/%d) decoding %s' % (idx, len(js.keys()), name), flush=True) input = kaldi_io.read_mat(js[name]['input'][0]['feat']) # TxD input = build_LFR_features(input, LFR_m, LFR_n) input = torch.from_numpy(input).float() input_length = torch.tensor([input.size(0)], dtype=torch.int) input = input.cuda() input_length = input_length.cuda() nbest_hyps = model.recognize(input, input_length, char_list, lm_model, args) new_js[name] = add_results_to_json(js[name], nbest_hyps, char_list) with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_js }, indent=4, sort_keys=True).encode('utf_8'))
def recognize(args): # model char_list, sos_id, eos_id = process_dict(args.dict) vocab_size = len(char_list) encoder = Encoder( args.d_input * args.LFR_m, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen, ) decoder = Decoder( sos_id, eos_id, vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen, ) model = Transformer(encoder, decoder) model.load_state_dict(flow.load(args.model_path)) device = flow.device("cuda") model.eval() model.to(device) LFR_m = args.LFR_m LFR_n = args.LFR_n char_list, sos_id, eos_id = process_dict(args.dict) assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id # read json data with open(args.recog_json, "rb") as f: js = json.load(f)["utts"] # decode each utterance new_js = {} with flow.no_grad(): for idx, name in enumerate(js.keys(), 1): print("(%d/%d) decoding %s" % (idx, len(js.keys()), name), flush=True) input = kaldi_io.read_mat(js[name]["input"][0]["feat"]) input = build_LFR_features(input, LFR_m, LFR_n) input = flow.tensor(input).to(dtype=flow.float32) input_length = flow.tensor([input.size(0)], dtype=flow.int64) input = input.to(device) input_length = input_length.to(device) nbest_hyps = model.recognize(input, input_length, char_list, args) new_js[name] = add_results_to_json(js[name], nbest_hyps, char_list) with open(args.result_label, "wb") as f: f.write(json.dumps({"utts": new_js}, indent=4, sort_keys=True).encode("utf_8"))
def recognize(args): model, LFR_m, LFR_n = Transformer.load_model(args.model_path, args) print(model) # device = torch.device("cuda:0") # model = model.to(device) # model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu))) model.eval() model.cuda() char_list, sos_id, eos_id = process_dict(args.dict) assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id # read json data with open(args.recog_json, 'r', encoding='utf-8') as f: js = json.load(f)['utts'] # decode each utterance new_js = {} with torch.no_grad(): for idx, name in enumerate(js.keys(), 1): print('(%d/%d) decoding %s' % (idx, len(js.keys()), name), flush=True) input = kaldi_io.read_mat(js[name]['input'][0]['feat']) # TxD input = build_LFR_features(input, LFR_m, LFR_n) input = torch.from_numpy(input).float() input_length = torch.tensor([input.size(0)], dtype=torch.int) input = input.cuda() input_length = input_length.cuda() nbest_hyps = model.recognize(input, input_length, char_list, args) new_js[name] = add_results_to_json(js[name], nbest_hyps, char_list) # new_js[name]['output'][0]['rec_text'] = new_js[name]['output'][0]['rec_text'].encode('utf_8') # new_js[name]['output'][0]['rec_token'] = new_js[name]['output'][0]['rec_token'].encode('utf_8') # new_js[name]['output'][0]['text'] = new_js[name]['output'][0]['text'].encode('utf_8') # new_js[name]['output'][0]['token'] = new_js[name]['output'][0]['token'].encode('utf_8') # print(new_js[name]) with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_js }, indent=4, sort_keys=True, ensure_ascii=False).encode('utf_8'))
def recognize(args): #import pdb #pdb.set_trace() char_list, sos_id, eos_id = process_dict(args.dict) args.char_list = char_list model, LFR_m, LFR_n = Seq2Seq.load_model(args.model_path, args) print(model) model.eval() model.cuda() char_list, sos_id, eos_id = process_dict(args.dict) assert model.decoder.sos_id == sos_id and model.decoder.eos_id == eos_id # read json data with open(args.recog_json, 'rb') as f: js = json.load(f)['utts'] # decode each utterance new_js = {} with torch.no_grad(): for idx, name in enumerate(js.keys(), 1): print('(%d/%d) decoding %s' % (idx, len(js.keys()), name), flush=True) input = kaldi_io.read_mat(js[name]['input'][0]['feat']) # TxD input = build_LFR_features(input, LFR_m, LFR_n) input = torch.from_numpy(input).float() input_length = torch.tensor([input.size(0)], dtype=torch.int) input = input.cuda() input_length = input_length.cuda() if args.align_trun: align = (js[name]['output'][0]['ctcid'].split()) nbest_hyps = model.recognize_align(input, input_length, char_list, align, args) else: nbest_hyps = model.recognize(input, input_length, char_list, args) new_js[name] = add_results_to_json(js[name], nbest_hyps, char_list) with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_js }, indent=4, sort_keys=True).encode('utf_8'))