def recog(args): '''Run recognition''' # seed setting torch.manual_seed(args.seed) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) def cpu_loader(storage, location): return storage def remove_dataparallel(state_dict): from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if k.startswith("module."): k = k[7:] new_state_dict[k] = v return new_state_dict model.load_state_dict( remove_dataparallel(torch.load(args.model, map_location=cpu_loader))) # read rnnlm if args.rnnlm: rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(train_args.char_list), 650)) rnnlm.load_state_dict(torch.load(args.rnnlm, map_location=cpu_loader)) rnnlm.eval() else: rnnlm = None if args.word_rnnlm: if not args.word_dict: logging.error( 'word dictionary file is not specified for the word RNNLM.') sys.exit(1) word_dict = load_labeldict(args.word_dict) char_dict = {x: i for i, x in enumerate(train_args.char_list)} word_rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(word_dict), 650)) word_rnnlm.load_state_dict( torch.load(args.word_rnnlm, map_location=cpu_loader)) word_rnnlm.eval() if rnnlm is not None: rnnlm = lm_pytorch.ClassifierWithState( extlm_pytorch.MultiLevelLM(word_rnnlm.predictor, rnnlm.predictor, word_dict, char_dict)) else: rnnlm = lm_pytorch.ClassifierWithState( extlm_pytorch.LookAheadWordLM(word_rnnlm.predictor, word_dict, char_dict)) # read json data with open(args.recog_json, 'rb') as f: recog_json = json.load(f)['utts'] if not torch_is_old: torch.set_grad_enabled(False) new_json = {} for name in recog_json.keys(): if args.input_tensor: feat = load_lua(recog_json[name]['input'][0]['feat']).numpy() else: feat = kaldi_io_py.read_mat(recog_json[name]['input'][0]['feat']) nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm=rnnlm) # get 1best and remove sos y_hat = nbest_hyps[0]['yseq'][1:] y_true = map(int, recog_json[name]['output'][0]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = dict() new_json[name]['utt2spk'] = recog_json[name]['utt2spk'] # added recognition results to json logging.debug("dump token id") out_dic = dict() for _key in recog_json[name]['output'][0]: out_dic[_key] = recog_json[name]['output'][0][_key] # TODO(karita) make consistent to chainer as idx[0] not idx out_dic['rec_tokenid'] = " ".join([str(idx) for idx in y_hat]) logging.debug("dump token") out_dic['rec_token'] = " ".join(seq_hat) logging.debug("dump text") out_dic['rec_text'] = seq_hat_text new_json[name]['output'] = [out_dic] # TODO(nelson): Modify this part when saving more than 1 hyp is enabled # add n-best recognition results with scores if args.beam_size > 1 and len(nbest_hyps) > 1: for i, hyp in enumerate(nbest_hyps): y_hat = hyp['yseq'][1:] seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') new_json[name]['rec_tokenid' + '[' + '{:05d}'.format(i) + ']'] = " ".join([str(idx) for idx in y_hat]) new_json[name]['rec_token' + '[' + '{:05d}'.format(i) + ']'] = " ".join(seq_hat) new_json[name]['rec_text' + '[' + '{:05d}'.format(i) + ']'] = seq_hat_text new_json[name]['score' + '[' + '{:05d}'.format(i) + ']'] = hyp['score'] # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_json }, indent=4, sort_keys=True).encode('utf_8'))
def train(args): '''Run training''' # seed setting torch.manual_seed(args.seed) # debug mode setting # 0 would be fastest, but 1 seems to be reasonable # by considering reproducability # revmoe type check if args.debugmode < 2: chainer.config.type_check = False logging.info('torch type check is disabled') # use determinisitic computation or not if args.debugmode < 1: torch.backends.cudnn.deterministic = False logging.info('torch cudnn deterministic is disabled') else: torch.backends.cudnn.deterministic = True # check cuda availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get input and output dimension info with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) # TODO(nelson) remove in future if 'input' not in valid_json[utts[0]]: logging.error("input file format (json) is modified, please redo" "stage 2: Dictionary and Json Data Preparation") sys.exit(1) idim = int(valid_json[utts[0]]['input'][0]['shape'][1]) odim = int(valid_json[utts[0]]['output'][0]['shape'][1]) logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) # specify attention, CTC, hybrid mode if args.mtlalpha == 1.0: mtl_mode = 'ctc' logging.info('Pure CTC mode') elif args.mtlalpha == 0.0: mtl_mode = 'att' logging.info('Pure attention mode') else: mtl_mode = 'mtl' logging.info('Multitask learning mode') # specify model architecture e2e = E2E(idim, odim, args) model = Loss(e2e, args.mtlalpha) # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.conf' with open(model_conf, 'wb') as f: logging.info('writing a model config file to' + model_conf) # TODO(watanabe) use others than pickle, possibly json, and save as a text pickle.dump((idim, odim, args), f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # Set gpu reporter = model.reporter ngpu = args.ngpu if ngpu == 1: gpu_id = range(ngpu) logging.info('gpu id: ' + str(gpu_id)) model.cuda() elif ngpu > 1: gpu_id = range(ngpu) logging.info('gpu id: ' + str(gpu_id)) model = DataParallel(model, device_ids=gpu_id) model.cuda() logging.info('batch size is automatically increased (%d -> %d)' % (args.batch_size, args.batch_size * args.ngpu)) args.batch_size *= args.ngpu else: gpu_id = [-1] # Setup an optimizer if args.opt == 'adadelta': optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95, eps=args.eps) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters()) # FIXME: TOO DIRTY HACK setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) # read json data with open(args.train_json, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] # make minibatch list (variable length) train = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) valid = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches) # hack to make batchsze argument as 1 # actual bathsize is included in a list train_iter = chainer.iterators.SerialIterator(train, 1) valid_iter = chainer.iterators.SerialIterator(valid, 1, repeat=False, shuffle=False) # converter choice converter = converter_kaldi if args.input_tensor: converter = converter_tensor # Set up a trainer updater = PytorchSeqUpdaterKaldi(model, args.grad_clip, train_iter, optimizer, converter=converter, device=gpu_id) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: chainer.serializers.load_npz(args.resume, trainer) if ngpu > 1: model.module.load_state_dict( torch.load(args.outdir + '/model.acc.best')) else: model.load_state_dict(torch.load(args.outdir + '/model.acc.best')) model = trainer.updater.model # Evaluate the model with the test dataset for each epoch trainer.extend( PytorchSeqEvaluaterKaldi(model, valid_iter, reporter, converter=converter, device=gpu_id)) # Save attention weight each epoch if args.num_save_attention > 0 and args.mtlalpha != 1.0: data = sorted(list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True) data = converter([data], device=gpu_id) trainer.extend(PlotAttentionReport(model, data, args.outdir + "/att_ws"), trigger=(1, 'epoch')) # Make a plot for training and validation values trainer.extend( extensions.PlotReport([ 'main/loss', 'validation/main/loss', 'main/loss_ctc', 'validation/main/loss_ctc', 'main/loss_att', 'validation/main/loss_att' ], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/acc', 'validation/main/acc'], 'epoch', file_name='acc.png')) # Save best models def torch_save(path, _): if ngpu > 1: torch.save(model.module.state_dict(), path) torch.save(model.module, path + ".pkl") else: torch.save(model.state_dict(), path) torch.save(model, path + ".pkl") trainer.extend( extensions.snapshot_object(model, 'model.loss.best', savefun=torch_save), trigger=training.triggers.MinValueTrigger('validation/main/loss')) # save the model after each epoch trainer.extend(extensions.snapshot_object(model, 'model_epoch{.updater.epoch}', savefun=torch_save), trigger=(1, 'epoch')) if mtl_mode is not 'ctc': trainer.extend( extensions.snapshot_object(model, 'model.acc.best', savefun=torch_save), trigger=training.triggers.MaxValueTrigger('validation/main/acc')) # epsilon decay in the optimizer def torch_load(path, obj): if ngpu > 1: model.module.load_state_dict(torch.load(path)) else: model.load_state_dict(torch.load(path)) return obj if args.opt == 'adadelta': if args.criterion == 'acc' and mtl_mode is not 'ctc': trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) elif args.criterion == 'loss': trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(100, 'iteration'))) report_keys = [ 'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att', 'validation/main/loss', 'validation/main/loss_ctc', 'validation/main/loss_att', 'main/acc', 'validation/main/acc', 'elapsed_time' ] if args.opt == 'adadelta': trainer.extend(extensions.observe_value( 'eps', lambda trainer: trainer.updater.get_optimizer('main'). param_groups[0]["eps"]), trigger=(100, 'iteration')) report_keys.append('eps') trainer.extend(extensions.PrintReport(report_keys), trigger=(100, 'iteration')) trainer.extend(extensions.ProgressBar()) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser() # general configuration parser.add_argument('--gpu', '-g', default='-1', type=str, help='GPU ID (negative value indicates CPU)') parser.add_argument('--debugmode', default=1, type=int, help='Debugmode') parser.add_argument('--seed', default=1, type=int, help='Random seed') parser.add_argument('--verbose', '-V', default=1, type=int, help='Verbose option') # task related parser.add_argument('--recog-feat', type=str, required=True, help='Filename of recognition feature data (Kaldi scp)') parser.add_argument('--recog-label', type=str, required=True, help='Filename of recognition label data (json)') parser.add_argument('--result-label', type=str, required=True, help='Filename of result label data (json)') # model (parameter) related parser.add_argument('--model', type=str, required=True, help='Model file parameters to read') parser.add_argument('--model-conf', type=str, required=True, help='Model config file') # search related parser.add_argument('--beam-size', type=int, default=1, help='Beam size') parser.add_argument('--penalty', default=0.0, type=float, help='Incertion penalty') parser.add_argument('--maxlenratio', default=0.5, type=float, help='Input length ratio to obtain max output length') parser.add_argument('--minlenratio', default=0.0, type=float, help='Input length ratio to obtain min output length') args = parser.parse_args() # logging info if args.verbose == 1: logging.basicConfig(level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") if args.verbose == 2: logging.basicConfig(level=logging.DEBUG, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") else: logging.basicConfig(level=logging.WARN, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") logging.warning("Skip DEBUG/INFO messages") # display PYTHONPATH logging.info('python path = ' + os.environ['PYTHONPATH']) # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) nseed = args.seed random.seed(nseed) np.random.seed(nseed) os.environ["CHAINER_SEED"] = str(nseed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) # chainer.serializers.load_npz(args.model, model) def cpu_loader(storage, location): return storage model.load_state_dict(torch.load(args.model, map_location=cpu_loader)) # prepare Kaldi reader reader = kaldi_io_py.read_mat_ark(args.recog_feat) # read json data with open(args.recog_label, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} for name, feat in reader: y_hat = e2e.recognize(feat, args, train_args.char_list) y_true = map(int, recog_json[name]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = recog_json[name] # added recognition results to json logging.debug("dump token id") # TODO(karita) make consistent to chainer as idx[0] not idx new_json[name]['rec_tokenid'] = " ".join([str(idx) for idx in y_hat]) logging.debug("dump token") new_json[name]['rec_token'] = " ".join(seq_hat) logging.debug("dump text") new_json[name]['rec_text'] = seq_hat_text # TODO fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write(json.dumps({'utts': new_json}, indent=4).encode('utf_8'))
# print/plot stats to args.outdir/results valid_result.report({ "loss": loss, "acc": acc, "loss_ctc": loss_ctc, "loss_att": loss_att, opt_key: get_opt_param() }) # save/load model valid_avg = valid_result.average() degrade = False if best["loss"] > valid_avg["loss"]: best["loss"] = valid_avg["loss"] torch.save(model.state_dict(), args.outdir + "/model.loss.best") elif args.criterion == "loss": degrade = True if best["acc"] < valid_avg["acc"]: best["acc"] = valid_avg["acc"] torch.save(model.state_dict(), args.outdir + "/model.acc.best") elif args.criterion == "acc": degrade = True if degrade: key = "eps" if args.opt == "adadelta" else "lr" for p in optimizer.param_groups: p[key] *= args.eps_decay model.load_state_dict( torch.load(args.outdir + "/model." + args.criterion + ".best"))
def recog(args): '''Run recognition''' # seed setting torch.manual_seed(args.seed) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) def cpu_loader(storage, location): return storage def remove_dataparallel(state_dict): from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if k.startswith("module."): name = k[7:] new_state_dict[name] = v return new_state_dict model.load_state_dict(remove_dataparallel(torch.load(args.model, map_location=cpu_loader))) # read rnnlm if args.rnnlm: rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(train_args.char_list), 650)) rnnlm.load_state_dict(torch.load(args.rnnlm, map_location=cpu_loader)) else: rnnlm = None # prepare Kaldi reader reader = kaldi_io_py.read_mat_ark(args.recog_feat) # read json data with open(args.recog_label, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} for name, feat in reader: nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm=rnnlm) # get 1best and remove sos y_hat = nbest_hyps[0]['yseq'][1:] y_true = map(int, recog_json[name]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = recog_json[name] # added recognition results to json logging.debug("dump token id") # TODO(karita) make consistent to chainer as idx[0] not idx new_json[name]['rec_tokenid'] = " ".join([str(idx) for idx in y_hat]) logging.debug("dump token") new_json[name]['rec_token'] = " ".join(seq_hat) logging.debug("dump text") new_json[name]['rec_text'] = seq_hat_text # add n-best recognition results with scores if args.beam_size > 1 and len(nbest_hyps) > 1: for i, hyp in enumerate(nbest_hyps): y_hat = hyp['yseq'][1:] seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') new_json[name]['rec_tokenid' + '[' + '{:05d}'.format(i) + ']'] = " ".join([str(idx) for idx in y_hat]) new_json[name]['rec_token' + '[' + '{:05d}'.format(i) + ']'] = " ".join(seq_hat) new_json[name]['rec_text' + '[' + '{:05d}'.format(i) + ']'] = seq_hat_text new_json[name]['score' + '[' + '{:05d}'.format(i) + ']'] = hyp['score'] # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write(json.dumps({'utts': new_json}, indent=4, sort_keys=True).encode('utf_8'))
def recog(args): '''Run recognition''' # seed setting torch.manual_seed(args.seed) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) def cpu_loader(storage, location): return storage model.load_state_dict(torch.load(args.model, map_location=cpu_loader)) # read rnnlm if args.rnnlm: logging.warning("rnnlm integration is not implemented in the pytorch backend") # prepare Kaldi reader reader = kaldi_io_py.read_mat_ark(args.recog_feat) # read json data with open(args.recog_label, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} for name, feat in reader: y_hat = e2e.recognize(feat, args, train_args.char_list) y_true = map(int, recog_json[name]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = recog_json[name] # added recognition results to json logging.debug("dump token id") # TODO(karita) make consistent to chainer as idx[0] not idx new_json[name]['rec_tokenid'] = " ".join([str(idx) for idx in y_hat]) logging.debug("dump token") new_json[name]['rec_token'] = " ".join(seq_hat) logging.debug("dump text") new_json[name]['rec_text'] = seq_hat_text # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write(json.dumps({'utts': new_json}, indent=4).encode('utf_8'))