def eval(epoch): pbar = tqdm(total=len(devset)) losses = [] is_new_epoch = 0 step = 0 while True: batch, is_new_epoch = devset.next() if is_new_epoch: break xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens'] xs = [stack_frame(x, args.n_stack, args.n_skip) for x in xs] xs = [np2tensor(x).float() for x in xs] xlen = torch.IntTensor([len(x) for x in xs]) xs = pad_list(xs, 0.0).cuda() _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys] ys_out_pad = pad_list(_ys, 0).long().cuda() ylen = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32)) model.eval() loss = model(xs, ys_out_pad, xlen, ylen) loss = float(loss.data) * len(xlen) losses.append(loss) step += 1 # //TODO vishay un-hardcode the batch size pbar.update(len(batch['xs'])) pbar.close() # Reset data counters devset.reset() return sum(losses) / len(devset) #, wer, cer
def eval(model): devset = Dataset(corpus='english', tsv_path=orgs['paths']['dev_tsv'], dict_path=orgs['paths']['dict'], unit='wp', wp_model=orgs['paths']['wp_model'], batch_size=args.batch_size, # * args.n_gpus, n_epochs=args.epochs, min_n_frames=40, max_n_frames=2000, sort_by='input', short2long=True, sort_stop_epoch=100, dynamic_batching=True, subsample_factor=1, discourse_aware=False, skip_thought=False, offset=0, epoch=0) pbar = tqdm(total=len(devset)) losses = [] is_new_epoch = 0 step = 0 while True: batch, is_new_epoch = devset.next() if is_new_epoch: break utt = batch['utt_ids'] xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens'] xs = [stack_frame(x, args.n_stack, args.n_skip) for x in xs] xs = [np2tensor(x).float() for x in xs] xlen = torch.IntTensor([len(x) for x in xs]) xs = pad_list(xs, 0.0).cuda() _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys] ys_out_pad = pad_list(_ys, 0).long().cuda() ylen = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32)) model.eval() loss = model(xs, ys_out_pad, xlen, ylen) loss = float(loss.sum().data) * len(xlen) losses.append(loss) step += 1 # //TODO vishay un-hardcode the batch size pbar.update(len(batch['xs'])) pbar.close() # Reset data counters devset.reset() return sum(losses) / len(devset) #, wer, cer
def train(): def adjust_learning_rate(optimizer, lr): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" # lr = args.lr * (0.1 ** (epoch // 30)) for param_group in optimizer.param_groups: param_group['lr'] = lr def add_noise(x): dim = x.shape[-1] noise = torch.normal(torch.zeros(dim), 0.075) if x.is_cuda: noise = noise.cuda() x.data += noise prev_loss = 2000 best_model = None lr = args.lr for epoch in range(1, args.epochs): totloss = 0 losses = [] start_time = time.time() # for i, (xs, ys, xlen, ylen) in enumerate(trainset): step = 0 is_new_epoch = 0 while True: batch, is_new_epoch = trainset.next() if is_new_epoch: break xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens'] xs = [np2tensor(x).float() for x in batch['xs']] xlen = torch.IntTensor([len(x) for x in batch['xs']]) xs = pad_list(xs, 0.0).cuda() _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys] ys_out_pad = pad_list(_ys, 0).long().cuda() ylen = np2tensor( np.fromiter([y.size(0) for y in _ys], dtype=np.int32)) # xs = Variable(torch.FloatTensor(xs)).cuda() if args.cuda: xs = xs.cuda() if args.noise: add_noise(xs) # ys = Variable(torch.LongTensor(ys)).cuda() # xlen = Variable(torch.IntTensor(xlen)); ylen = Variable(torch.IntTensor(ylen)) model.train() optimizer.zero_grad() loss = model(xs, ys_out_pad, xlen, ylen) loss.backward() loss = float(loss.data) * len(xlen) totloss += loss losses.append(loss) if args.gradclip: grad_norm = nn.utils.clip_grad_norm(model.parameters(), 200) optimizer.step() step += 1 # //TODO vishay un-hardcode the batch size # print(step, '/68k') if step % args.log_interval == 0 and step > 0: loss = totloss / args.batch_size / args.log_interval logging.info('[Epoch %d Batch %d] train_loss %.2f' % (epoch, step, loss)) totloss = 0 trainset.reset() losses = sum(losses) / len(trainset) #val_l, wer, cer = eval(epoch) val_l = eval(epoch) # logging.info('[Epoch %d] time cost %.2fs, train loss %.2f; cv loss %.2f; wer %.2f ; cer %.2f ; lr %.3e' % ( # epoch, time.time() - start_time, losses, val_l, wer, cer, lr # )) logging.info( '[Epoch %d] time cost %.2fs, train loss %.2f; cv loss %.2f; lr %.3e' % (epoch, time.time() - start_time, losses, val_l, lr)) if val_l < prev_loss: prev_loss = val_l best_model = '{}/params_epoch{:02d}_tr{:.2f}_cv{:.2f}'.format( args.out, epoch, losses, val_l) torch.save(model.state_dict(), best_model) else: torch.save( model.state_dict(), '{}/params_epoch{:02d}_tr{:.2f}_cv{:.2f}_rejected'.format( args.out, epoch, losses, val_l)) model.load_state_dict(torch.load(best_model)) if args.cuda: model.cuda() if args.schedule: lr /= 2 adjust_learning_rate(optimizer, lr)
def eval(epoch): recog_dir = args.out ref_trn_save_path = recog_dir + '/ref_epoch_' + str(epoch) + '.trn' hyp_trn_save_path = recog_dir + '/hyp_epoch_' + str(epoch) + '.trn' wer, cer = 0, 0 n_sub_w, n_ins_w, n_del_w = 0, 0, 0 n_sub_c, n_ins_c, n_del_c = 0, 0, 0 n_word, n_char = 0, 0 pbar = tqdm(total=len(devset)) f_hyp = open(hyp_trn_save_path, 'w') f_ref = open(ref_trn_save_path, 'w') losses = [] is_new_epoch = 0 # for xs, ys, xlen, ylen in devset: step = 0 while True: batch, is_new_epoch = devset.next() # if is_new_epoch: # break xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens'] xs = [np2tensor(x).float() for x in batch['xs']] xlen = torch.IntTensor([len(x) for x in batch['xs']]) xs = pad_list(xs, 0.0).cuda() _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys] ys_out_pad = pad_list(_ys, 0).long().cuda() ylen = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32)) # xs = Variable(torch.FloatTens is:open or(xs), volatile=True).cuda() # ys = Variable(torch.LongTensor(ys), volatile=True).cuda() # xlen = Variable(torch.IntTensor(xlen)); ylen = Variable(torch.IntTensor(ylen)) model.eval() #logging.info('================== Evaluation Mode =================') loss = model(xs, ys_out_pad, xlen, ylen) loss = float(loss.data) * len(xlen) losses.append(loss) step += 1 # //TODO vishay un-hardcode the batch size best_hyps_id, _ = model.greedy_decode(xs) for b in range(len(batch['xs'])): ref = batch['text'][b] hyp = devset.idx2token[0](best_hyps_id[b]) hyp = removeDuplicates(hyp) # Write to trn utt_id = str(batch['utt_ids'][b]) speaker = str(batch['speakers'][b]).replace('-', '_') if hyp is None: hyp = "none" f_ref.write(ref + ' (' + speaker + '-' + utt_id + ')\n') f_hyp.write(hyp + ' (' + speaker + '-' + utt_id + ')\n') logging.info('utt-id: %s' % utt_id) logging.info('Ref: %s' % ref) logging.info('Hyp: %s' % hyp) logging.info('-' * 150) if 'char' in devset.unit: # //TODO this is only for char unit # Compute WER wer_b, sub_b, ins_b, del_b = compute_wer(ref=ref.split(' '), hyp=hyp.split(' '), normalize=False) wer += wer_b n_sub_w += sub_b n_ins_w += ins_b n_del_w += del_b n_word += len(ref.split(' ')) # Compute CER cer_b, sub_b, ins_b, del_b = compute_wer(ref=list(ref), hyp=list(hyp), normalize=False) cer += cer_b n_sub_c += sub_b n_ins_c += ins_b n_del_c += del_b n_char += len(ref) pbar.update(len(batch['xs'])) if is_new_epoch: break pbar.close() # Reset data counters devset.reset() if 'char' in devset.unit: wer /= n_word n_sub_w /= n_word n_ins_w /= n_word n_del_w /= n_word else: wer = n_sub_w = n_ins_w = n_del_w = 0 cer /= n_char n_sub_c /= n_char n_ins_c /= n_char n_del_c /= n_char logging.info('WER (%s): %.2f %%' % (devset.set, wer)) logging.info('SUB: %.2f / INS: %.2f / DEL: %.2f' % (n_sub_w, n_ins_w, n_del_w)) logging.info('CER (%s): %.2f %%' % (devset.set, cer)) logging.info('SUB: %.2f / INS: %.2f / DEL: %.2f' % (n_sub_c, n_ins_c, n_del_c)) # print(step, '/12k dev') return sum(losses) / len(devset), wer, cer
n_epochs=args.epochs, min_n_frames=40, max_n_frames=2000, sort_by='input', short2long=True, sort_stop_epoch=100, dynamic_batching=True, subsample_factor=1, discourse_aware=False, skip_thought=False) vocab = trainset.vocab batch, is_new_epoch = trainset.next() xs = [np2tensor(x).float() for x in batch['xs']] xlens = torch.IntTensor([len(x) for x in batch['xs']]) xs = pad_list(xs, 0.0) ys = batch['ys'] _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys] # // TODO vishay optimize for gpu ys_out_pad = pad_list(_ys, 0).long() ylens = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32)) # TODO use config file model = Transducer(81, vocab, 256, 3, args.dropout, bidirectional=args.bi) print(model) for param in model.parameters(): torch.nn.init.uniform(param, -0.1, 0.1) if args.init: model.load_state_dict(torch.load(args.init)) if args.initam: model.encoder.load_state_dict(torch.load(args.initam)) if args.cuda: model.cuda() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
return stacked_feat if __name__ == '__main__': train_set = Dataset( corpus='hindi', tsv_path= "/home/asir/kaldi/egs/mini_librispeech/s5/data/dataset/train_clean_5_5_wpbpe30000.tsv", dict_path= "/home/asir/kaldi/egs/mini_librispeech/s5/data/dict/train_clean_5_wpbpe30000.txt", unit='wp', wp_model= "/home/asir/kaldi/egs/mini_librispeech/s5/data/dict/train_clean_5_bpe30000.model", batch_size=50, # * args.n_gpus, n_epochs=25, min_n_frames=40, max_n_frames=2000, sort_by='input', short2long=True, sort_stop_epoch=100, dynamic_batching=True, subsample_factor=1, discourse_aware=False, skip_thought=False) batch, is_new_epoch = train_set.next() xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens'] xs = [stack_frame(x, 3, 3) for x in xs] xs = [np2tensor(x).float() for x in xs] xs = pad_list(xs, 0.0) print(xs.shape)
def train(): ### initialize model definition and dataparallel with open(orgs['paths']['dict'],encoding='utf-8') as f: lines = f.read().splitlines() vocab = len(lines) +1 model_base = Transducer(81*args.n_stack, vocab, 512, 3, 1024, 2, args.dropout, bidirectional=args.bi) model = nn.DataParallel(model_base) print(model) ### if starting training from start, log the num_parameters and uniform init the values if not args.init: Trainable,Total = total_parameters(model) logging.info("Trainable %.2f M parameters" % (Trainable / 1000000)) logging.info("Total %.2f M parameters" % (Total / 1000000)) for param in model.parameters(): torch.nn.init.uniform_(param, -0.1, 0.1) if args.cuda: model.cuda() optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, momentum=.9) ## if resuming training, load ckpt, load dataset with offset if args.init: model, optimizer, start_epoch, start_step, start_offset = load_ckp(args.init, model, optimizer) else: start_epoch = 0 start_step = 0 start_offset = 0 trainset = Dataset(corpus='english', tsv_path=orgs['paths']['train_tsv'], dict_path=orgs['paths']['dict'], unit='wp', wp_model=orgs['paths']['wp_model'], batch_size=args.batch_size, # * args.n_gpus, n_epochs=args.epochs, min_n_frames=40, max_n_frames=2000, sort_by='input', short2long=True, sort_stop_epoch=100, dynamic_batching=True, subsample_factor=1, discourse_aware=False, skip_thought=False, offset=start_offset, epoch=start_epoch) def adjust_learning_rate(optimizer, lr): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" # lr = args.lr * (0.1 ** (epoch // 30)) for param_group in optimizer.param_groups: param_group['lr'] = lr def add_noise(x): dim = x.shape[-1] noise = torch.normal(torch.zeros(dim), 0.075) if x.is_cuda: noise = noise.cuda() x.data += noise prev_loss = 2000 best_model = None lr = args.lr for epoch in range(start_epoch, args.epochs): if start_offset > 0 and epoch == start_epoch: print('training epoch #'+str(epoch)+' from #'+str(start_offset)+' example ...') else: print('training epoch #'+str(epoch)+' from start ...') start_step = 0 totloss = 0; offset = start_offset losses = [] start_time = time.time() step = start_step is_new_epoch = 0 tbar = tqdm(total=len(trainset)) while True: batch, is_new_epoch = trainset.next() if is_new_epoch: break xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens'] xs = [stack_frame(x, args.n_stack, args.n_skip) for x in xs] xs = [np2tensor(x).float() for x in xs] xlen = torch.IntTensor([len(x) for x in xs]) xs = pad_list(xs, 0.0).cuda() _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys] ys_out_pad = pad_list(_ys, 0).long().cuda() ylen = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32)) if args.cuda: xs = xs.cuda() if args.noise: add_noise(xs) model.train() optimizer.zero_grad() loss = model( xs, ys_out_pad, xlen, ylen) loss.sum().backward() loss = float(loss.sum().data) * len(xlen) totloss += loss; losses.append(loss) if args.gradclip: grad_norm = nn.utils.clip_grad_norm_(model.parameters(), 200) optimizer.step() offset += len(batch['xs']) step += 1 # //TODO vishay un-hardcode the batch size if step % args.ckpt_interval == 0 and step > 0: checkpoint = {'epoch':epoch, 'offset':offset, 'step':step, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()} if not os.path.exists(os.path.join(args.out,'ckpt')): os.mkdir(os.path.join(args.out,'ckpt')) save_ckp(checkpoint,os.path.join(args.out,'ckpt')) if step % args.log_interval == 0 and step > 0: loss = totloss / args.batch_size / args.log_interval logging.info('[Epoch %d Batch %d] train_loss %.2f' % (epoch, step, loss)) totloss = 0 tbar.update(len(batch['xs'])) tbar.close() trainset.reset() losses = sum(losses) / len(trainset) print('evaluating epoch #'+str(epoch)+'...') val_l = eval(model) logging.info('[Epoch %d] time cost %.2fs, train loss %.2f; cv loss %.2f; lr %.3e' % ( epoch + args.resume_epoch, time.time() - start_time, losses, val_l, lr )) if val_l < prev_loss: prev_loss = val_l best_model = 'params_epoch{:02d}_tr{:.2f}_cv{:.2f}'.format( epoch + args.resume_epoch, losses, val_l) ##when ckpting for end of epoch, send epoch+1 so that the start_epoch when loading ckpt is the next one. and step is 0 as the new start_step checkpoint = {'epoch':epoch+1, 'offset':offset, 'step':0, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()} if not os.path.exists(os.path.join(args.out,'models')): os.mkdir(os.path.join(args.out,'models')) save_ckp(checkpoint,os.path.join(args.out,'models'),best_model=best_model) # torch.save(model.state_dict(), best_model) # torch.save(model.module.state_dict(),best_model+'_base') #think this can be loaded for inference into the model_base without wrapping with data parallel. else: rejected_model = 'params_epoch{:02d}_tr{:.2f}_cv{:.2f}_rejected'.format( epoch + args.resume_epoch, losses, val_l) checkpoint = {'epoch':epoch+1, 'offset':offset, 'step':0, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()} if not os.path.exists(os.path.join(args.out,'models')): os.mkdir(os.path.join(args.out,'models')) save_ckp(checkpoint,os.path.join(args.out,'models'),best_model=rejected_model) print('rejecting this epoch, bcoz',val_l,'>',prev_loss,'loading model::',best_model) model, optimizer, _, _,_ = load_ckp(os.path.join(args.out,'models',best_model), model, optimizer) if args.cuda: model.cuda() if args.schedule: lr /= 2 adjust_learning_rate(optimizer, lr)