def recog(args): '''Run recognition''' # seed setting torch.manual_seed(args.seed) # read training config idim, odim, train_args = get_model_conf(args.model, args.model_conf) # load trained model parameters logging.info('reading model parameters from ' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) torch_load(args.model, model) # read rnnlm if args.rnnlm: rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf) rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(train_args.char_list), rnnlm_args.unit)) torch_load(args.rnnlm, rnnlm) rnnlm.eval() else: rnnlm = None if args.word_rnnlm: if not args.word_dict: logging.error( 'word dictionary file is not specified for the word RNNLM.') sys.exit(1) rnnlm_args = get_model_conf(args.word_rnnlm, args.rnnlm_conf) word_dict = load_labeldict(args.word_dict) char_dict = {x: i for i, x in enumerate(train_args.char_list)} word_rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(word_dict), rnnlm_args.unit)) torch_load(args.word_rnnlm, word_rnnlm) word_rnnlm.eval() if rnnlm is not None: rnnlm = lm_pytorch.ClassifierWithState( extlm_pytorch.MultiLevelLM(word_rnnlm.predictor, rnnlm.predictor, word_dict, char_dict)) else: rnnlm = lm_pytorch.ClassifierWithState( extlm_pytorch.LookAheadWordLM(word_rnnlm.predictor, word_dict, char_dict)) # read json data with open(args.recog_json, 'rb') as f: js = json.load(f)['utts'] # decode each utterance new_js = {} with torch.no_grad(): for idx, name in enumerate(js.keys(), 1): logging.info('(%d/%d) decoding ' + name, idx, len(js.keys())) feat = kaldi_io_py.read_mat(js[name]['input'][0]['feat']) nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm) new_js[name] = add_results_to_json(js[name], nbest_hyps, train_args.char_list) # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_js }, indent=4, sort_keys=True).encode('utf_8'))
def recog(args): '''Run recognition''' # seed setting torch.manual_seed(args.seed) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) def cpu_loader(storage, location): return storage def remove_dataparallel(state_dict): from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if k.startswith("module."): k = k[7:] new_state_dict[k] = v return new_state_dict model.load_state_dict( remove_dataparallel(torch.load(args.model, map_location=cpu_loader))) # read rnnlm if args.rnnlm: rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(train_args.char_list), 650)) rnnlm.load_state_dict(torch.load(args.rnnlm, map_location=cpu_loader)) rnnlm.eval() else: rnnlm = None if args.word_rnnlm: if not args.word_dict: logging.error( 'word dictionary file is not specified for the word RNNLM.') sys.exit(1) word_dict = load_labeldict(args.word_dict) char_dict = {x: i for i, x in enumerate(train_args.char_list)} word_rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(word_dict), 650)) word_rnnlm.load_state_dict( torch.load(args.word_rnnlm, map_location=cpu_loader)) word_rnnlm.eval() if rnnlm is not None: rnnlm = lm_pytorch.ClassifierWithState( extlm_pytorch.MultiLevelLM(word_rnnlm.predictor, rnnlm.predictor, word_dict, char_dict)) else: rnnlm = lm_pytorch.ClassifierWithState( extlm_pytorch.LookAheadWordLM(word_rnnlm.predictor, word_dict, char_dict)) # read json data with open(args.recog_json, 'rb') as f: recog_json = json.load(f)['utts'] if not torch_is_old: torch.set_grad_enabled(False) new_json = {} for name in recog_json.keys(): if args.input_tensor: feat = load_lua(recog_json[name]['input'][0]['feat']).numpy() else: feat = kaldi_io_py.read_mat(recog_json[name]['input'][0]['feat']) nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm=rnnlm) # get 1best and remove sos y_hat = nbest_hyps[0]['yseq'][1:] y_true = map(int, recog_json[name]['output'][0]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = dict() new_json[name]['utt2spk'] = recog_json[name]['utt2spk'] # added recognition results to json logging.debug("dump token id") out_dic = dict() for _key in recog_json[name]['output'][0]: out_dic[_key] = recog_json[name]['output'][0][_key] # TODO(karita) make consistent to chainer as idx[0] not idx out_dic['rec_tokenid'] = " ".join([str(idx) for idx in y_hat]) logging.debug("dump token") out_dic['rec_token'] = " ".join(seq_hat) logging.debug("dump text") out_dic['rec_text'] = seq_hat_text new_json[name]['output'] = [out_dic] # TODO(nelson): Modify this part when saving more than 1 hyp is enabled # add n-best recognition results with scores if args.beam_size > 1 and len(nbest_hyps) > 1: for i, hyp in enumerate(nbest_hyps): y_hat = hyp['yseq'][1:] seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') new_json[name]['rec_tokenid' + '[' + '{:05d}'.format(i) + ']'] = " ".join([str(idx) for idx in y_hat]) new_json[name]['rec_token' + '[' + '{:05d}'.format(i) + ']'] = " ".join(seq_hat) new_json[name]['rec_text' + '[' + '{:05d}'.format(i) + ']'] = seq_hat_text new_json[name]['score' + '[' + '{:05d}'.format(i) + ']'] = hyp['score'] # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_json }, indent=4, sort_keys=True).encode('utf_8'))
def recog(args): '''Run recognition''' # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) os.environ["CHAINER_SEED"] = str(args.seed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # read training config idim, odim, train_args = get_model_conf(args.model, args.model_conf) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from ' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) chainer_load(args.model, model) # read rnnlm if args.rnnlm: rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf) rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(train_args.char_list), rnnlm_args.unit)) chainer_load(args.rnnlm, rnnlm) else: rnnlm = None if args.word_rnnlm: if not args.word_dict: logging.error( 'word dictionary file is not specified for the word RNNLM.') sys.exit(1) rnnlm_args = get_model_conf(args.word_rnnlm, args.rnnlm_conf) word_dict = load_labeldict(args.word_dict) char_dict = {x: i for i, x in enumerate(train_args.char_list)} word_rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(word_dict), rnnlm_args.unit)) chainer_load(args.word_rnnlm, word_rnnlm) if rnnlm is not None: rnnlm = lm_chainer.ClassifierWithState( extlm_chainer.MultiLevelLM(word_rnnlm.predictor, rnnlm.predictor, word_dict, char_dict)) else: rnnlm = lm_chainer.ClassifierWithState( extlm_chainer.LookAheadWordLM(word_rnnlm.predictor, word_dict, char_dict)) # read json data with open(args.recog_json, 'rb') as f: js = json.load(f)['utts'] # decode each utterance new_js = {} with chainer.no_backprop_mode(): for idx, name in enumerate(js.keys(), 1): logging.info('(%d/%d) decoding ' + name, idx, len(js.keys())) feat = kaldi_io_py.read_mat(js[name]['input'][0]['feat']) nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm) new_js[name] = add_results_to_json(js[name], nbest_hyps, train_args.char_list) # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_js }, indent=4, sort_keys=True).encode('utf_8'))
def recog(args): '''Run recognition''' # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) os.environ["CHAINER_SEED"] = str(args.seed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) chainer.serializers.load_npz(args.model, model) # read rnnlm if args.rnnlm: rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(train_args.char_list), 650)) chainer.serializers.load_npz(args.rnnlm, rnnlm) else: rnnlm = None if args.word_rnnlm: if not args.word_dict: logging.error( 'word dictionary file is not specified for the word RNNLM.') sys.exit(1) word_dict = load_labeldict(args.word_dict) char_dict = {x: i for i, x in enumerate(train_args.char_list)} word_rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(word_dict), 650)) chainer.serializers.load_npz(args.word_rnnlm, word_rnnlm) if rnnlm is not None: rnnlm = lm_chainer.ClassifierWithState( extlm_chainer.MultiLevelLM(word_rnnlm.predictor, rnnlm.predictor, word_dict, char_dict)) else: rnnlm = lm_chainer.ClassifierWithState( extlm_chainer.LookAheadWordLM(word_rnnlm.predictor, word_dict, char_dict)) # read json data with open(args.recog_json, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} for name in recog_json.keys(): feat = kaldi_io_py.read_mat(recog_json[name]['input'][0]['feat']) logging.info('decoding ' + name) nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm) # get 1best and remove sos y_hat = nbest_hyps[0]['yseq'][1:] y_true = map(int, recog_json[name]['output'][0]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = dict() new_json[name]['utt2spk'] = recog_json[name]['utt2spk'] # add 1-best recognition results to json logging.debug("dump token id") out_dic = dict() for _key in recog_json[name]['output'][0]: out_dic[_key] = recog_json[name]['output'][0][_key] # TODO(karita) make consistent to chainer as idx[0] not idx out_dic['rec_tokenid'] = " ".join([str(idx[0]) for idx in y_hat]) logging.debug("dump token") out_dic['rec_token'] = " ".join(seq_hat) logging.debug("dump text") out_dic['rec_text'] = seq_hat_text new_json[name]['output'] = [out_dic] # TODO(nelson): Modify this part when saving more than 1 hyp is enabled # add n-best recognition results with scores if args.beam_size > 1 and len(nbest_hyps) > 1: for i, hyp in enumerate(nbest_hyps): y_hat = hyp['yseq'][1:] seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') new_json[name]['rec_tokenid' + '[' + '{:05d}'.format(i) + ']'] \ = " ".join([str(idx[0]) for idx in y_hat]) new_json[name]['rec_token' + '[' + '{:05d}'.format(i) + ']'] = " ".join(seq_hat) new_json[name]['rec_text' + '[' + '{:05d}'.format(i) + ']'] = seq_hat_text new_json[name]['score' + '[' + '{:05d}'.format(i) + ']'] = hyp['score'] # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_json }, indent=4, sort_keys=True).encode('utf_8'))
def recog(args): '''Run recognition''' # seed setting torch.manual_seed(args.seed) # read training config idim, odim, train_args = get_model_conf(args.model, args.model_conf) # load trained model parameters logging.info('reading model parameters from ' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) torch_load(args.model, model) # read rnnlm if args.rnnlm: rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf) rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(train_args.char_list), rnnlm_args.unit)) torch_load(args.rnnlm, rnnlm) rnnlm.eval() else: rnnlm = None if args.word_rnnlm: if not args.word_dict: logging.error('word dictionary file is not specified for the word RNNLM.') sys.exit(1) rnnlm_args = get_model_conf(args.word_rnnlm, args.rnnlm_conf) word_dict = load_labeldict(args.word_dict) char_dict = {x: i for i, x in enumerate(train_args.char_list)} word_rnnlm = lm_pytorch.ClassifierWithState(lm_pytorch.RNNLM(len(word_dict), rnnlm_args.unit)) torch_load(args.word_rnnlm, word_rnnlm) word_rnnlm.eval() if rnnlm is not None: rnnlm = lm_pytorch.ClassifierWithState( extlm_pytorch.MultiLevelLM(word_rnnlm.predictor, rnnlm.predictor, word_dict, char_dict)) else: rnnlm = lm_pytorch.ClassifierWithState( extlm_pytorch.LookAheadWordLM(word_rnnlm.predictor, word_dict, char_dict)) # read json data with open(args.recog_json, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} with torch.no_grad(): for name in recog_json.keys(): feat = kaldi_io_py.read_mat(recog_json[name]['input'][0]['feat']) nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm=rnnlm) # get 1best and remove sos y_hat = nbest_hyps[0]['yseq'][1:] y_true = map(int, recog_json[name]['output'][0]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = dict() new_json[name]['utt2spk'] = recog_json[name]['utt2spk'] # added recognition results to json logging.debug("dump token id") out_dic = dict() for _key in recog_json[name]['output'][0]: out_dic[_key] = recog_json[name]['output'][0][_key] # TODO(karita) make consistent to chainer as idx[0] not idx out_dic['rec_tokenid'] = " ".join([str(idx) for idx in y_hat]) logging.debug("dump token") out_dic['rec_token'] = " ".join(seq_hat) logging.debug("dump text") out_dic['rec_text'] = seq_hat_text new_json[name]['output'] = [out_dic] # TODO(nelson): Modify this part when saving more than 1 hyp is enabled # add n-best recognition results with scores if args.beam_size > 1 and len(nbest_hyps) > 1: for i, hyp in enumerate(nbest_hyps): y_hat = hyp['yseq'][1:] seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') new_json[name]['rec_tokenid' + '[' + '{:05d}'.format(i) + ']'] = \ " ".join([str(idx) for idx in y_hat]) new_json[name]['rec_token' + '[' + '{:05d}'.format(i) + ']'] = " ".join(seq_hat) new_json[name]['rec_text' + '[' + '{:05d}'.format(i) + ']'] = seq_hat_text new_json[name]['score' + '[' + '{:05d}'.format(i) + ']'] = hyp['score'] # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write(json.dumps({'utts': new_json}, indent=4, sort_keys=True).encode('utf_8'))