def main(): parser = argparse.ArgumentParser() # general configuration parser.add_argument('--gpu', '-g', default='-1', type=str, help='GPU ID (negative value indicates CPU)') parser.add_argument('--debugmode', default=1, type=int, help='Debugmode') parser.add_argument('--seed', default=1, type=int, help='Random seed') parser.add_argument('--verbose', '-V', default=1, type=int, help='Verbose option') # task related parser.add_argument('--recog-feat', type=str, required=True, help='Filename of recognition feature data (Kaldi scp)') parser.add_argument('--recog-label', type=str, required=True, help='Filename of recognition label data (json)') parser.add_argument('--result-label', type=str, required=True, help='Filename of result label data (json)') # model (parameter) related parser.add_argument('--model', type=str, required=True, help='Model file parameters to read') parser.add_argument('--model-conf', type=str, required=True, help='Model config file') # search related parser.add_argument('--beam-size', type=int, default=1, help='Beam size') parser.add_argument('--penalty', default=0.0, type=float, help='Incertion penalty') parser.add_argument('--maxlenratio', default=0.5, type=float, help='Input length ratio to obtain max output length') parser.add_argument('--minlenratio', default=0.0, type=float, help='Input length ratio to obtain min output length') args = parser.parse_args() # logging info if args.verbose == 1: logging.basicConfig(level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") if args.verbose == 2: logging.basicConfig(level=logging.DEBUG, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") else: logging.basicConfig(level=logging.WARN, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") logging.warning("Skip DEBUG/INFO messages") # display PYTHONPATH logging.info('python path = ' + os.environ['PYTHONPATH']) # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) nseed = args.seed random.seed(nseed) np.random.seed(nseed) os.environ["CHAINER_SEED"] = str(nseed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) # chainer.serializers.load_npz(args.model, model) def cpu_loader(storage, location): return storage model.load_state_dict(torch.load(args.model, map_location=cpu_loader)) # prepare Kaldi reader reader = kaldi_io_py.read_mat_ark(args.recog_feat) # read json data with open(args.recog_label, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} for name, feat in reader: y_hat = e2e.recognize(feat, args, train_args.char_list) y_true = map(int, recog_json[name]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = recog_json[name] # added recognition results to json logging.debug("dump token id") # TODO(karita) make consistent to chainer as idx[0] not idx new_json[name]['rec_tokenid'] = " ".join([str(idx) for idx in y_hat]) logging.debug("dump token") new_json[name]['rec_token'] = " ".join(seq_hat) logging.debug("dump text") new_json[name]['rec_text'] = seq_hat_text # TODO fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write(json.dumps({'utts': new_json}, indent=4).encode('utf_8'))
def recog(args): '''Run recognition''' # seed setting torch.manual_seed(args.seed) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) def cpu_loader(storage, location): return storage def remove_dataparallel(state_dict): from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if k.startswith("module."): name = k[7:] new_state_dict[name] = v return new_state_dict model.load_state_dict(remove_dataparallel(torch.load(args.model, map_location=cpu_loader))) # read rnnlm if args.rnnlm: rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(train_args.char_list), 650)) rnnlm.load_state_dict(torch.load(args.rnnlm, map_location=cpu_loader)) else: rnnlm = None # prepare Kaldi reader reader = kaldi_io_py.read_mat_ark(args.recog_feat) # read json data with open(args.recog_label, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} for name, feat in reader: nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm=rnnlm) # get 1best and remove sos y_hat = nbest_hyps[0]['yseq'][1:] y_true = map(int, recog_json[name]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = recog_json[name] # added recognition results to json logging.debug("dump token id") # TODO(karita) make consistent to chainer as idx[0] not idx new_json[name]['rec_tokenid'] = " ".join([str(idx) for idx in y_hat]) logging.debug("dump token") new_json[name]['rec_token'] = " ".join(seq_hat) logging.debug("dump text") new_json[name]['rec_text'] = seq_hat_text # add n-best recognition results with scores if args.beam_size > 1 and len(nbest_hyps) > 1: for i, hyp in enumerate(nbest_hyps): y_hat = hyp['yseq'][1:] seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') new_json[name]['rec_tokenid' + '[' + '{:05d}'.format(i) + ']'] = " ".join([str(idx) for idx in y_hat]) new_json[name]['rec_token' + '[' + '{:05d}'.format(i) + ']'] = " ".join(seq_hat) new_json[name]['rec_text' + '[' + '{:05d}'.format(i) + ']'] = seq_hat_text new_json[name]['score' + '[' + '{:05d}'.format(i) + ']'] = hyp['score'] # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write(json.dumps({'utts': new_json}, indent=4, sort_keys=True).encode('utf_8'))
def recog(args): '''Run recognition''' # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) os.environ["CHAINER_SEED"] = str(args.seed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) chainer.serializers.load_npz(args.model, model) # read rnnlm if args.rnnlm: rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(train_args.char_list), 650)) chainer.serializers.load_npz(args.rnnlm, rnnlm) else: rnnlm = None # prepare Kaldi reader reader = kaldi_io_py.read_mat_ark(args.recog_feat) # read json data with open(args.recog_label, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} for name, feat in reader: logging.info('decoding ' + name) if args.beam_size == 1: y_hat = e2e.recognize(feat, args, train_args.char_list, rnnlm) else: nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm) # get 1best and remove sos y_hat = nbest_hyps[0]['yseq'][1:] y_true = map(int, recog_json[name]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = recog_json[name] # add 1-best recognition results to json new_json[name]['rec_tokenid'] = " ".join( [str(idx[0]) for idx in y_hat]) new_json[name]['rec_token'] = " ".join(seq_hat) new_json[name]['rec_text'] = seq_hat_text # add n-best recognition results with scores if args.beam_size > 1 and len(nbest_hyps) > 1: for i, hyp in enumerate(nbest_hyps): y_hat = hyp['yseq'][1:] seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') new_json[name]['rec_tokenid' + '[' + '{:05d}'.format(i) + ']'] \ = " ".join([str(idx[0]) for idx in y_hat]) new_json[name]['rec_token' + '[' + '{:05d}'.format(i) + ']'] = " ".join(seq_hat) new_json[name]['rec_text' + '[' + '{:05d}'.format(i) + ']'] = seq_hat_text new_json[name]['score' + '[' + '{:05d}'.format(i) + ']'] = hyp['score'] # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_json }, indent=4, sort_keys=True).encode('utf_8'))
def recog(args): '''Run recognition''' # seed setting torch.manual_seed(args.seed) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) def cpu_loader(storage, location): return storage model.load_state_dict(torch.load(args.model, map_location=cpu_loader)) # read rnnlm if args.rnnlm: logging.warning("rnnlm integration is not implemented in the pytorch backend") # prepare Kaldi reader reader = kaldi_io_py.read_mat_ark(args.recog_feat) # read json data with open(args.recog_label, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} for name, feat in reader: y_hat = e2e.recognize(feat, args, train_args.char_list) y_true = map(int, recog_json[name]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = recog_json[name] # added recognition results to json logging.debug("dump token id") # TODO(karita) make consistent to chainer as idx[0] not idx new_json[name]['rec_tokenid'] = " ".join([str(idx) for idx in y_hat]) logging.debug("dump token") new_json[name]['rec_token'] = " ".join(seq_hat) logging.debug("dump text") new_json[name]['rec_text'] = seq_hat_text # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write(json.dumps({'utts': new_json}, indent=4).encode('utf_8'))