Пример #1
0
def eval(epoch):

    pbar = tqdm(total=len(devset))
    losses = []
    is_new_epoch = 0
    step = 0
    while True:
        batch, is_new_epoch = devset.next()
        if is_new_epoch:
            break
        xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens']
        xs = [stack_frame(x, args.n_stack, args.n_skip) for x in xs]
        xs = [np2tensor(x).float() for x in xs]
        xlen = torch.IntTensor([len(x) for x in xs])
        xs = pad_list(xs, 0.0).cuda()
        _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys]
        ys_out_pad = pad_list(_ys, 0).long().cuda()
        ylen = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32))
        model.eval()
        loss = model(xs, ys_out_pad, xlen, ylen)
        loss = float(loss.data) * len(xlen)
        losses.append(loss)
        step += 1  # //TODO vishay un-hardcode the batch size

        pbar.update(len(batch['xs']))
    pbar.close()

    # Reset data counters
    devset.reset()

    return sum(losses) / len(devset)  #, wer, cer
Пример #2
0
def eval(model):
    devset = Dataset(corpus='english',
                     tsv_path=orgs['paths']['dev_tsv'],
                     dict_path=orgs['paths']['dict'],
                     unit='wp',
                     wp_model=orgs['paths']['wp_model'],
                     batch_size=args.batch_size,  # * args.n_gpus,
                     n_epochs=args.epochs,
                     min_n_frames=40,
                     max_n_frames=2000,
                     sort_by='input',
                     short2long=True,
                     sort_stop_epoch=100,
                     dynamic_batching=True,
                     subsample_factor=1,
                     discourse_aware=False,
                     skip_thought=False,
                     offset=0,
                     epoch=0)

    pbar = tqdm(total=len(devset))
    losses = []
    is_new_epoch = 0
    step = 0
    while True:
        batch, is_new_epoch = devset.next()
        if is_new_epoch:
            break
        utt = batch['utt_ids']
        xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens']
        xs = [stack_frame(x, args.n_stack, args.n_skip) for x in xs]
        xs = [np2tensor(x).float() for x in xs]
        xlen = torch.IntTensor([len(x) for x in xs])
        xs = pad_list(xs, 0.0).cuda()
        _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys]
        ys_out_pad = pad_list(_ys, 0).long().cuda()
        ylen = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32))
        model.eval()
        loss = model(xs, ys_out_pad, xlen, ylen)
        loss = float(loss.sum().data) * len(xlen)
        losses.append(loss)
        step += 1  # //TODO vishay un-hardcode the batch size


        pbar.update(len(batch['xs']))
    pbar.close()

    # Reset data counters
    devset.reset()

    return sum(losses) / len(devset) #, wer, cer
Пример #3
0
def train():
    def adjust_learning_rate(optimizer, lr):
        """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
        # lr = args.lr * (0.1 ** (epoch // 30))
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    def add_noise(x):
        dim = x.shape[-1]
        noise = torch.normal(torch.zeros(dim), 0.075)
        if x.is_cuda: noise = noise.cuda()
        x.data += noise

    prev_loss = 2000
    best_model = None
    lr = args.lr
    for epoch in range(1, args.epochs):
        totloss = 0
        losses = []
        start_time = time.time()
        # for i, (xs, ys, xlen, ylen) in enumerate(trainset):
        step = 0
        is_new_epoch = 0
        while True:
            batch, is_new_epoch = trainset.next()
            if is_new_epoch:
                break
            xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens']
            xs = [np2tensor(x).float() for x in batch['xs']]
            xlen = torch.IntTensor([len(x) for x in batch['xs']])
            xs = pad_list(xs, 0.0).cuda()
            _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys]
            ys_out_pad = pad_list(_ys, 0).long().cuda()
            ylen = np2tensor(
                np.fromiter([y.size(0) for y in _ys], dtype=np.int32))
            #   xs = Variable(torch.FloatTensor(xs)).cuda()
            if args.cuda: xs = xs.cuda()
            if args.noise: add_noise(xs)
            #   ys = Variable(torch.LongTensor(ys)).cuda()
            #   xlen = Variable(torch.IntTensor(xlen)); ylen = Variable(torch.IntTensor(ylen))
            model.train()
            optimizer.zero_grad()
            loss = model(xs, ys_out_pad, xlen, ylen)
            loss.backward()
            loss = float(loss.data) * len(xlen)
            totloss += loss
            losses.append(loss)
            if args.gradclip:
                grad_norm = nn.utils.clip_grad_norm(model.parameters(), 200)
            optimizer.step()

            step += 1  # //TODO vishay un-hardcode the batch size
            # print(step, '/68k')
            if step % args.log_interval == 0 and step > 0:
                loss = totloss / args.batch_size / args.log_interval
                logging.info('[Epoch %d Batch %d] train_loss %.2f' %
                             (epoch, step, loss))
                totloss = 0
        trainset.reset()
        losses = sum(losses) / len(trainset)
        #val_l, wer, cer = eval(epoch)
        val_l = eval(epoch)
        # logging.info('[Epoch %d] time cost %.2fs, train loss %.2f; cv loss %.2f; wer %.2f ; cer %.2f ; lr %.3e' % (
        #     epoch, time.time() - start_time, losses, val_l, wer, cer, lr
        # ))
        logging.info(
            '[Epoch %d] time cost %.2fs, train loss %.2f; cv loss %.2f; lr %.3e'
            % (epoch, time.time() - start_time, losses, val_l, lr))
        if val_l < prev_loss:
            prev_loss = val_l
            best_model = '{}/params_epoch{:02d}_tr{:.2f}_cv{:.2f}'.format(
                args.out, epoch, losses, val_l)
            torch.save(model.state_dict(), best_model)
        else:
            torch.save(
                model.state_dict(),
                '{}/params_epoch{:02d}_tr{:.2f}_cv{:.2f}_rejected'.format(
                    args.out, epoch, losses, val_l))
            model.load_state_dict(torch.load(best_model))
            if args.cuda: model.cuda()
            if args.schedule:
                lr /= 2
                adjust_learning_rate(optimizer, lr)
Пример #4
0
def eval(epoch):
    recog_dir = args.out
    ref_trn_save_path = recog_dir + '/ref_epoch_' + str(epoch) + '.trn'
    hyp_trn_save_path = recog_dir + '/hyp_epoch_' + str(epoch) + '.trn'
    wer, cer = 0, 0
    n_sub_w, n_ins_w, n_del_w = 0, 0, 0
    n_sub_c, n_ins_c, n_del_c = 0, 0, 0
    n_word, n_char = 0, 0
    pbar = tqdm(total=len(devset))
    f_hyp = open(hyp_trn_save_path, 'w')
    f_ref = open(ref_trn_save_path, 'w')
    losses = []
    is_new_epoch = 0
    #    for xs, ys, xlen, ylen in devset:
    step = 0
    while True:
        batch, is_new_epoch = devset.next()
        #        if is_new_epoch:
        #            break
        xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens']
        xs = [np2tensor(x).float() for x in batch['xs']]
        xlen = torch.IntTensor([len(x) for x in batch['xs']])
        xs = pad_list(xs, 0.0).cuda()
        _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys]
        ys_out_pad = pad_list(_ys, 0).long().cuda()
        ylen = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32))
        # xs = Variable(torch.FloatTens is:open or(xs), volatile=True).cuda()
        # ys = Variable(torch.LongTensor(ys), volatile=True).cuda()
        # xlen = Variable(torch.IntTensor(xlen)); ylen = Variable(torch.IntTensor(ylen))
        model.eval()
        #logging.info('================== Evaluation Mode =================')
        loss = model(xs, ys_out_pad, xlen, ylen)
        loss = float(loss.data) * len(xlen)
        losses.append(loss)
        step += 1  # //TODO vishay un-hardcode the batch size
        best_hyps_id, _ = model.greedy_decode(xs)

        for b in range(len(batch['xs'])):
            ref = batch['text'][b]
            hyp = devset.idx2token[0](best_hyps_id[b])
            hyp = removeDuplicates(hyp)
            # Write to trn
            utt_id = str(batch['utt_ids'][b])
            speaker = str(batch['speakers'][b]).replace('-', '_')
            if hyp is None:
                hyp = "none"
            f_ref.write(ref + ' (' + speaker + '-' + utt_id + ')\n')
            f_hyp.write(hyp + ' (' + speaker + '-' + utt_id + ')\n')
            logging.info('utt-id: %s' % utt_id)
            logging.info('Ref: %s' % ref)
            logging.info('Hyp: %s' % hyp)
            logging.info('-' * 150)

            if 'char' in devset.unit:  # //TODO this is only for char unit
                # Compute WER
                wer_b, sub_b, ins_b, del_b = compute_wer(ref=ref.split(' '),
                                                         hyp=hyp.split(' '),
                                                         normalize=False)
                wer += wer_b
                n_sub_w += sub_b
                n_ins_w += ins_b
                n_del_w += del_b
                n_word += len(ref.split(' '))

                # Compute CER
            cer_b, sub_b, ins_b, del_b = compute_wer(ref=list(ref),
                                                     hyp=list(hyp),
                                                     normalize=False)
            cer += cer_b
            n_sub_c += sub_b
            n_ins_c += ins_b
            n_del_c += del_b
            n_char += len(ref)

        pbar.update(len(batch['xs']))
        if is_new_epoch:
            break

    pbar.close()

    # Reset data counters
    devset.reset()

    if 'char' in devset.unit:
        wer /= n_word
        n_sub_w /= n_word
        n_ins_w /= n_word
        n_del_w /= n_word
    else:
        wer = n_sub_w = n_ins_w = n_del_w = 0

    cer /= n_char
    n_sub_c /= n_char
    n_ins_c /= n_char
    n_del_c /= n_char

    logging.info('WER (%s): %.2f %%' % (devset.set, wer))
    logging.info('SUB: %.2f / INS: %.2f / DEL: %.2f' %
                 (n_sub_w, n_ins_w, n_del_w))
    logging.info('CER (%s): %.2f %%' % (devset.set, cer))
    logging.info('SUB: %.2f / INS: %.2f / DEL: %.2f' %
                 (n_sub_c, n_ins_c, n_del_c))

    # print(step, '/12k  dev')
    return sum(losses) / len(devset), wer, cer
Пример #5
0
    wp_model="",
    batch_size=args.batch_size,  # * args.n_gpus,
    n_epochs=args.epochs,
    min_n_frames=40,
    max_n_frames=2000,
    sort_by='input',
    short2long=True,
    sort_stop_epoch=100,
    dynamic_batching=True,
    subsample_factor=1,
    discourse_aware=False,
    skip_thought=False)

vocab = trainset.vocab
batch, is_new_epoch = trainset.next()
xs = [np2tensor(x).float() for x in batch['xs']]
xlens = torch.IntTensor([len(x) for x in batch['xs']])
xs = pad_list(xs, 0.0)
ys = batch['ys']
_ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1)
       for y in ys]  # // TODO vishay optimize for gpu
ys_out_pad = pad_list(_ys, 0).long()
ylens = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32))
# TODO use config file
model = Transducer(81, vocab, 256, 3, args.dropout, bidirectional=args.bi)
print(model)
for param in model.parameters():
    torch.nn.init.uniform(param, -0.1, 0.1)
if args.init: model.load_state_dict(torch.load(args.init))
if args.initam: model.encoder.load_state_dict(torch.load(args.initam))
if args.cuda: model.cuda()
Пример #6
0
    return stacked_feat


if __name__ == '__main__':
    train_set = Dataset(
        corpus='hindi',
        tsv_path=
        "/home/asir/kaldi/egs/mini_librispeech/s5/data/dataset/train_clean_5_5_wpbpe30000.tsv",
        dict_path=
        "/home/asir/kaldi/egs/mini_librispeech/s5/data/dict/train_clean_5_wpbpe30000.txt",
        unit='wp',
        wp_model=
        "/home/asir/kaldi/egs/mini_librispeech/s5/data/dict/train_clean_5_bpe30000.model",
        batch_size=50,  # * args.n_gpus,
        n_epochs=25,
        min_n_frames=40,
        max_n_frames=2000,
        sort_by='input',
        short2long=True,
        sort_stop_epoch=100,
        dynamic_batching=True,
        subsample_factor=1,
        discourse_aware=False,
        skip_thought=False)
    batch, is_new_epoch = train_set.next()
    xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens']
    xs = [stack_frame(x, 3, 3) for x in xs]
    xs = [np2tensor(x).float() for x in xs]
    xs = pad_list(xs, 0.0)
    print(xs.shape)
Пример #7
0
def train():

### initialize model definition and dataparallel
    with open(orgs['paths']['dict'],encoding='utf-8') as f:
        lines = f.read().splitlines()

    vocab = len(lines) +1
    model_base = Transducer(81*args.n_stack, vocab, 512, 3, 1024, 2, args.dropout, bidirectional=args.bi)
    model = nn.DataParallel(model_base)
    print(model)


### if starting training from start, log the num_parameters and uniform init the values
    if not args.init:
        Trainable,Total = total_parameters(model)
        logging.info("Trainable %.2f M parameters" % (Trainable / 1000000))
        logging.info("Total %.2f M parameters" % (Total / 1000000))
        for param in model.parameters():
            torch.nn.init.uniform_(param, -0.1, 0.1)

    if args.cuda: model.cuda()
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, momentum=.9)



## if resuming training, load ckpt, load dataset with offset

    if args.init:
        model, optimizer, start_epoch, start_step, start_offset = load_ckp(args.init, model, optimizer)
    else:
        start_epoch = 0
        start_step = 0
        start_offset = 0

    trainset = Dataset(corpus='english',
                       tsv_path=orgs['paths']['train_tsv'],
                       dict_path=orgs['paths']['dict'],
                       unit='wp',
                       wp_model=orgs['paths']['wp_model'],
                       batch_size=args.batch_size,  # * args.n_gpus,
                       n_epochs=args.epochs,
                       min_n_frames=40,
                       max_n_frames=2000,
                       sort_by='input',
                       short2long=True,
                       sort_stop_epoch=100,
                       dynamic_batching=True,
                       subsample_factor=1,
                       discourse_aware=False,
                       skip_thought=False,
                       offset=start_offset,
                       epoch=start_epoch)


    def adjust_learning_rate(optimizer, lr):
        """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
        # lr = args.lr * (0.1 ** (epoch // 30))
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    def add_noise(x):
        dim = x.shape[-1]
        noise = torch.normal(torch.zeros(dim), 0.075)
        if x.is_cuda: noise = noise.cuda()
        x.data += noise

    prev_loss = 2000
    best_model = None

    lr = args.lr
    for epoch in range(start_epoch, args.epochs):
        if start_offset > 0 and epoch == start_epoch:
            print('training epoch #'+str(epoch)+' from #'+str(start_offset)+' example ...')
        else:
            print('training epoch #'+str(epoch)+' from start ...')
            start_step = 0
        totloss = 0;
        offset = start_offset
        losses = []
        start_time = time.time()
        step = start_step
        is_new_epoch = 0
        tbar = tqdm(total=len(trainset))
        while True:
            batch, is_new_epoch = trainset.next()
            if is_new_epoch:
                break
            xs, ys, xlens = batch['xs'], batch['ys'], batch['xlens']
            xs = [stack_frame(x, args.n_stack, args.n_skip) for x in xs]
            xs = [np2tensor(x).float() for x in xs]
            xlen = torch.IntTensor([len(x) for x in xs])
            xs = pad_list(xs, 0.0).cuda()
            _ys = [np2tensor(np.fromiter(y, dtype=np.int64), -1) for y in ys]
            ys_out_pad = pad_list(_ys, 0).long().cuda()
            ylen = np2tensor(np.fromiter([y.size(0) for y in _ys], dtype=np.int32))
            if args.cuda: xs = xs.cuda()
            if args.noise: add_noise(xs)
            model.train()
            optimizer.zero_grad()
            loss = model( xs, ys_out_pad, xlen, ylen)
            loss.sum().backward()
            loss = float(loss.sum().data) * len(xlen)
            totloss += loss;
            losses.append(loss)
            if args.gradclip: grad_norm = nn.utils.clip_grad_norm_(model.parameters(), 200)
            optimizer.step()
            offset += len(batch['xs'])
            step += 1  # //TODO vishay un-hardcode the batch size
            if step % args.ckpt_interval == 0 and step > 0:
                checkpoint = {'epoch':epoch, 'offset':offset, 'step':step, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
                if not os.path.exists(os.path.join(args.out,'ckpt')):
                    os.mkdir(os.path.join(args.out,'ckpt'))
                save_ckp(checkpoint,os.path.join(args.out,'ckpt'))                

            if step % args.log_interval == 0 and step > 0:
                loss = totloss / args.batch_size / args.log_interval
                logging.info('[Epoch %d Batch %d] train_loss %.2f' % (epoch, step, loss))
                totloss = 0
            tbar.update(len(batch['xs']))
        tbar.close()
        trainset.reset()
        losses = sum(losses) / len(trainset)
        print('evaluating epoch #'+str(epoch)+'...')
        val_l = eval(model)
        
        logging.info('[Epoch %d] time cost %.2fs, train loss %.2f; cv loss %.2f; lr %.3e' % (
            epoch + args.resume_epoch, time.time() - start_time, losses, val_l, lr
        ))
        if val_l < prev_loss:
            prev_loss = val_l
            best_model = 'params_epoch{:02d}_tr{:.2f}_cv{:.2f}'.format( epoch + args.resume_epoch, losses, val_l)
            ##when ckpting for end of epoch, send epoch+1 so that the start_epoch when loading ckpt is the next one. and step is 0 as the new start_step
            checkpoint = {'epoch':epoch+1, 'offset':offset, 'step':0, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
            if not os.path.exists(os.path.join(args.out,'models')):
                os.mkdir(os.path.join(args.out,'models'))
            save_ckp(checkpoint,os.path.join(args.out,'models'),best_model=best_model)

#            torch.save(model.state_dict(), best_model)
#            torch.save(model.module.state_dict(),best_model+'_base')     #think this can be loaded for inference into the model_base without wrapping with data parallel.
        else:
            rejected_model =  'params_epoch{:02d}_tr{:.2f}_cv{:.2f}_rejected'.format( epoch + args.resume_epoch, losses, val_l)
            checkpoint = {'epoch':epoch+1, 'offset':offset, 'step':0, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
            if not os.path.exists(os.path.join(args.out,'models')):
                os.mkdir(os.path.join(args.out,'models'))
            save_ckp(checkpoint,os.path.join(args.out,'models'),best_model=rejected_model)
            print('rejecting this epoch, bcoz',val_l,'>',prev_loss,'loading model::',best_model)
            model, optimizer, _, _,_  = load_ckp(os.path.join(args.out,'models',best_model), model, optimizer)
            if args.cuda: model.cuda()
            if args.schedule:
                lr /= 2
                adjust_learning_rate(optimizer, lr)