def train(self, train_dataset, dev_dataset=None): include_eos_sos = False if self.params['model']['type'] == 'transducer' else True train_loader = FeatureLoader(train_dataset, shuffle=self.shuffle, ngpu=self.ngpu, mode=self.parallel_mode, include_eos_sos=include_eos_sos) if dev_dataset is not None: dev_loader = FeatureLoader(dev_dataset, shuffle=False, ngpu=self.ngpu, mode=self.parallel_mode, include_eos_sos=include_eos_sos) epochs = self.params['train']['epochs'] TrainLossNote = Summary() DevLossNote = Summary() for epoch in range(epochs): self.optimizer.epoch() if self.parallel_mode == 'ddp': train_loader.set_epoch(epoch) self.logger.info('Set the epoch of train sampler as %d' % epoch) train_loss = self.train_one_epoch(epoch, train_loader.loader) TrainLossNote.update(epoch, train_loss) if self.local_rank == 0: self.logger.info('-*Train-Epoch-%d/%d*-, AvgLoss:%.5f' % (epoch, epochs, train_loss)) self.save_model(epoch) self.logger.info('Save the model!') if self.is_visual and self.local_rank == 0: self.visulizer.add_scalar('train_epoch_loss', train_loss, epoch) if dev_dataset is not None: dev_loss = self.eval(dev_loader.loader) DevLossNote.update(epoch, dev_loss) if self.local_rank == 0: self.logger.info('-*Eval-Epoch-%d/%d*-, AvgLoss:%.5f' % (epoch, epochs, dev_loss)) if dev_loss < DevLossNote.best()[1] and self.local_rank == 0: self.save_model('model.best.pt') self.logger.info('Update the best checkpoint!') if self.local_rank == 0: self.logger.info('Training Summary:') BEST_T_EPOCH, BEST_T_LOSS = TrainLossNote.best() self.logger.info('At the %d-st epoch of training, the model performs best (Loss:%.5f)!' % (BEST_T_EPOCH, BEST_T_LOSS)) if dev_dataset is not None: BEST_E_EPOCH, BEST_E_LOSS = DevLossNote.best() self.logger.info('At the %d-st epoch of validation, the model performs best (Loss:%.5f)!' % (BEST_E_EPOCH, BEST_E_LOSS)) if self.is_visual: self.visulizer.close()
def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.deterministic = True with open(args.config, 'r') as f: params = yaml.load(f, Loader=yaml.FullLoader) expdir = os.path.join('egs', params['data']['name'], 'exp', params['train']['save_name']) if not os.path.exists(expdir): os.makedirs(expdir) shutil.copy(args.config, expdir) model_type = params['model']['type'] if model_type == 'transformer': model = Transformer(params['model']) elif model_type == 'transformer_lm': model = TransformerLanguageModel(params['model']) else: raise NotADirectoryError if args.ngpu >= 1: model.cuda() # print(model) # build optimizer if params['train']['finetuning'] == True: print('Now is finetuning, please notice learning rate!!!!!') optimizer = FinetuningOptimizer(model) else: optimizer = TransformerOptimizer(model, params['train'], model_size=params['model']['d_model'], parallel_mode=args.parallel_mode) trainer = Trainer(params, model=model, optimizer=optimizer, is_visual=True, expdir=expdir, ngpu=args.ngpu, parallel_mode=args.parallel_mode, local_rank=args.local_rank) train_loader = FeatureLoader( params, 'train', shuffle=params['train']['shuffle'], ngpu=args.ngpu, mode=args.parallel_mode) dev_loader = FeatureLoader( params, 'dev', shuffle=params['train']['shuffle'], ngpu=args.ngpu, mode=args.parallel_mode) trainer.train(train_loader=train_loader, dev_loader=dev_loader)
def main(args): checkpoint = torch.load(args.load_model) if 'params' in checkpoint: params = checkpoint['params'] else: assert os.path.isfile(args.config), 'please specify a configure file.' with open(args.config, 'r') as f: params = yaml.load(f) params['data']['shuffle'] = False params['data']['spec_argument'] = False params['data']['short_first'] = False params['data']['batch_size'] = args.batch_size expdir = os.path.join('egs', params['data']['name'], 'exp', params['train']['save_name']) if args.suffix is None: decode_dir = os.path.join(expdir, 'decode_%s' % args.decode_set) else: decode_dir = os.path.join( expdir, 'decode_%s_%s' % (args.decode_set, args.suffix)) if not os.path.exists(decode_dir): os.makedirs(decode_dir) model = Transformer(params['model']) model.load_state_dict(checkpoint['model']) print('Load pre-trained model from %s' % args.load_model) model.eval() if args.ngpu > 0: model.cuda() char2unit = load_vocab(params['data']['vocab']) unit2char = {i: c for c, i in char2unit.items()} dataset = AudioDataset(params['data'], args.decode_set) data_loader = FeatureLoader(dataset) recognizer = TransformerRecognizer(model, unit2char=unit2char, beam_width=args.beam_width, max_len=args.max_len, penalty=args.penalty, lamda=args.lamda, ngpu=args.ngpu) totals = len(dataset) batch_size = params['data']['batch_size'] writer = open(os.path.join(decode_dir, 'predict.txt'), 'w') for step, (utt_id, batch) in enumerate(data_loader.loader): if args.ngpu > 0: inputs = batch['inputs'].cuda() inputs_length = batch['inputs_length'].cuda() preds = recognizer.recognize(inputs, inputs_length) targets = batch['targets'] targets_length = batch['targets_length'] for b in range(len(preds)): n = step * batch_size + b truth = ' '.join( [unit2char[i.item()] for i in targets[b][1:targets_length[b]]]) print('[%d / %d ] %s - pred : %s' % (n, totals, utt_id[b], preds[b])) print('[%d / %d ] %s - truth: %s' % (n, totals, utt_id[b], truth)) writer.write(utt_id[b] + ' ' + preds[b] + '\n') writer.close()
def main(args): checkpoint = torch.load(args.load_model) if 'params' in checkpoint: params = checkpoint['params'] else: assert os.path.isfile(args.config), 'please specify a configure file.' with open(args.config, 'r') as f: params = yaml.load(f) params['data']['shuffle'] = False params['data']['spec_augment'] = False params['data']['short_first'] = False params['data']['batch_size'] = args.batch_size expdir = os.path.join('egs', params['data']['name'], 'exp', params['train']['save_name']) decoder_set_name = 'decode_%s' % args.decode_set if args.load_language_model is not None: decoder_set_name += '_lm_lmw%.2f' % args.lm_weight if args.suffix is not None: decoder_set_name += '_%s' % args.suffix decode_dir = os.path.join(expdir, decoder_set_name) if not os.path.exists(decode_dir): os.makedirs(decode_dir) model = Transformer(params['model']) model.load_state_dict(checkpoint['model']) print('Load pre-trained model from %s' % args.load_model) model.eval() if args.ngpu > 0: model.cuda() if args.load_language_model is not None: lm_chkpt = torch.load(args.load_language_model) lm = TransformerLanguageModel(lm_chkpt['params']['model']) lm.load_state_dict(lm_chkpt['model']) lm.eval() if args.ngpu > 0: lm.cuda() print('Load pre-trained transformer language model from %s' % args.load_language_model) else: lm = None char2unit = load_vocab(params['data']['vocab']) unit2char = {i: c for c, i in char2unit.items()} data_loader = FeatureLoader(params, args.decode_set, is_eval=True) recognizer = TransformerRecognizer(model, lm=lm, lm_weight=args.lm_weight, unit2char=unit2char, beam_width=args.beam_width, max_len=args.max_len, penalty=args.penalty, lamda=args.lamda, ngpu=args.ngpu) totals = len(data_loader.dataset) batch_size = params['data']['batch_size'] writer = open(os.path.join(decode_dir, 'predict.txt'), 'w') writerRef = open(os.path.join(decode_dir, 'reference.txt'), 'w') for step, (utt_id, batch) in enumerate(data_loader.loader): if args.ngpu > 0: inputs = batch['inputs'].cuda() inputs_length = batch['inputs_length'].cuda() else: inputs = batch['inputs'] inputs_length = batch['inputs_length'] preds = recognizer.recognize(inputs, inputs_length) targets = batch['targets'] targets_length = batch['targets_length'] for b in range(len(preds)): n = step * batch_size + b truth = ' '.join([ unit2char[i.item()] for i in targets[b][1:targets_length[b] + 1] ]) print('[%d / %d ] %s - pred : %s' % (n, totals, utt_id[b], preds[b])) print('[%d / %d ] %s - truth: %s' % (n, totals, utt_id[b], truth)) if utt_id[b][7] == '1': newpred = preds[b] + " (" + 'S1000' + "-" + utt_id[b] + ")" newtruth = truth + " (" + 'S1000' + "-" + utt_id[b] + ")" elif utt_id[b][7] == '2': newpred = preds[b] + " (" + 'S2000' + "-" + utt_id[b] + ")" newtruth = truth + " (" + 'S2000' + "-" + utt_id[b] + ")" elif utt_id[b][0] == 'O': newpred = preds[b] + " (" + 'O' + "-" + utt_id[b] + ")" newtruth = truth + " (" + 'O' + "-" + utt_id[b] + ")" else: newpred = preds[b] + " (" + utt_id[b][6:11] + "-" + utt_id[ b] + ")" newtruth = truth + " (" + utt_id[b][6:11] + "-" + utt_id[ b] + ")" writer.write(newpred + '\n') writerRef.write(newtruth + '\n') writer.close() writerRef.close()