Exemplo n.º 1
0
    def infer(self, model, epoch=0):
        top1 = utils.AverageMeter()
        top5 = utils.AverageMeter()
        data_time = utils.AverageMeter()
        batch_time = utils.AverageMeter()
        model.eval()

        start = time.time()
        prefetcher = data_prefetcher(self.val_data)
        input, target = prefetcher.next()
        step = 0
        while input is not None:
            step += 1
            data_t = time.time() - start
            n = input.size(0)

            logits, logits_aux = model(input)
            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))

            batch_t = time.time() - start
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)
            data_time.update(data_t)
            batch_time.update(batch_t)

            if step % self.report_freq == 0:
                logging.info('Val epoch %03d step %03d | top1_acc %.2f  top5_acc %.2f | batch_time %.3f  data_time %.3f', epoch, step, top1.avg, top5.avg, batch_time.avg, data_time.avg)
            start = time.time()
            input, target = prefetcher.next()

        logging.info('EPOCH%d Valid_acc  top1 %.2f top5 %.2f batch_time %.3f data_time %.3f', epoch, top1.avg, top5.avg, batch_time.avg, data_time.avg)
        return top1.avg, top5.avg, batch_time.avg, data_time.avg
Exemplo n.º 2
0
def validate(logger, writer, device, config, valid_loader, model, epoch, cur_step):
    top1 = utils.AverageMeter()
    top5 = utils.AverageMeter()
    losses = utils.AverageMeter()

    model.eval()

    with torch.no_grad():
        for step, (X, y) in enumerate(valid_loader):
            X, y = X.to(device, non_blocking=True), y.to(device, non_blocking=True)
            N = X.size(0)

            logits = model(X)
            loss = model.module.criterion(logits, y)

            prec1, prec5 = utils.accuracy(logits, y, topk=(1, 5))
            losses.update(loss.item(), N)
            top1.update(prec1.item(), N)
            top5.update(prec5.item(), N)

            if step % config.print_freq == 0 or step == len(valid_loader)-1:
                logger.info(
                    "Valid: [{:2d}/{}] Step {:03d}/{:03d} Loss {losses.avg:.3f} "
                    "Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})".format(
                        epoch+1, config.epochs, step, len(valid_loader)-1, losses=losses,
                        top1=top1, top5=top5))

    writer.add_scalar('val/loss', losses.avg, cur_step)
    writer.add_scalar('val/top1', top1.avg, cur_step)
    writer.add_scalar('val/top5', top5.avg, cur_step)

    logger.info("Valid: [{:2d}/{}] Final Prec@1 {:.4%}".format(epoch+1, config.epochs, top1.avg))

    return top1.avg
Exemplo n.º 3
0
def train(logger, writer, device, config, train_loader, model, optimizer,
          criterion, epoch):
    top1 = utils.AverageMeter()
    top5 = utils.AverageMeter()
    losses = utils.AverageMeter()

    cur_step = epoch * len(train_loader)
    cur_lr = optimizer.param_groups[0]['lr']
    logger.info("Epoch {} LR {}".format(epoch, cur_lr))
    writer.add_scalar('train/lr', cur_lr, cur_step)

    model.train()

    for step, (X, y) in enumerate(train_loader):
        X, y = X.to(device, non_blocking=True), y.to(device, non_blocking=True)
        N = X.size(0)

        optimizer.zero_grad()
        logits, aux_logits = model(X)
        loss = criterion(logits, y)
        if config.aux_weight > 0.:
            loss += config.aux_weight * criterion(aux_logits, y)
        loss.backward()
        # gradient clipping
        nn.utils.clip_grad_norm_(model.parameters(), config.grad_clip)
        if config.dist_privacy:
            # privice gradient clipping
            clipping_dispatcher(model.module.named_weights(),
                                config.max_weights_grad_norm, config.var_gamma,
                                device, logger)
        optimizer.step()

        prec1, prec5 = utils.accuracy(logits, y, topk=(1, 5))
        losses.update(loss.item(), N)
        top1.update(prec1.item(), N)
        top5.update(prec5.item(), N)

        if step % config.print_freq == 0 or step == len(train_loader) - 1:
            logger.info(
                "Train: [{:3d}/{}] Step {:03d}/{:03d} Loss {losses.avg:.3f} "
                "Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})".format(
                    epoch + 1,
                    config.epochs,
                    step,
                    len(train_loader) - 1,
                    losses=losses,
                    top1=top1,
                    top5=top5))

        writer.add_scalar('train/loss', loss.item(), cur_step)
        writer.add_scalar('train/top1', prec1.item(), cur_step)
        writer.add_scalar('train/top5', prec5.item(), cur_step)
        cur_step += 1

    logger.info("Train: [{:3d}/{}] Final Prec@1 {:.4%}".format(
        epoch + 1, config.epochs, top1.avg))
Exemplo n.º 4
0
    def train(self, model, epoch):
        objs = utils.AverageMeter()
        top1 = utils.AverageMeter()
        top5 = utils.AverageMeter()
        data_time = utils.AverageMeter()
        batch_time = utils.AverageMeter()
        model.train()
        start = time.time()

        prefetcher = data_prefetcher(self.train_data)
        input, target = prefetcher.next()
        step = 0
        while input is not None:
            data_t = time.time() - start
            self.scheduler.step()
            n = input.size(0)
            if step == 0:
                logging.info('epoch %d lr %e', epoch,
                             self.optimizer.param_groups[0]['lr'])
            self.optimizer.zero_grad()

            logits = model(input)
            if self.config.optim.label_smooth:
                loss = self.criterion(logits, target,
                                      self.config.optim.smooth_alpha)
            else:
                loss = self.criterion(logits, target)

            loss.backward()
            if self.config.optim.use_grad_clip:
                nn.utils.clip_grad_norm_(model.parameters(),
                                         self.config.optim.grad_clip)
            self.optimizer.step()

            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))

            batch_t = time.time() - start
            start = time.time()

            objs.update(loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)
            data_time.update(data_t)
            batch_time.update(batch_t)
            if step != 0 and step % self.report_freq == 0:
                logging.info(
                    'Train epoch %03d step %03d | loss %.4f  top1_acc %.2f  top5_acc %.2f | batch_time %.3f  data_time %.3f',
                    epoch, step, objs.avg, top1.avg, top5.avg, batch_time.avg,
                    data_time.avg)
            input, target = prefetcher.next()
            step += 1
        logging.info(
            'EPOCH%d Train_acc  top1 %.2f top5 %.2f batch_time %.3f data_time %.3f',
            epoch, top1.avg, top5.avg, batch_time.avg, data_time.avg)

        return top1.avg, top5.avg, objs.avg, batch_time.avg, data_time.avg
Exemplo n.º 5
0
def train(logger, writer, device, config, train_loader, valid_loader, model, architect, w_optim, alpha_optim, lr, epoch):
    top1 = utils.AverageMeter()
    top5 = utils.AverageMeter()
    losses = utils.AverageMeter()

    cur_step = epoch*len(train_loader)
    writer.add_scalar('train/lr', lr, cur_step)

    model.train()

    for step, ((trn_X, trn_y), (val_X, val_y)) in enumerate(zip(train_loader, valid_loader)):
        trn_X, trn_y = trn_X.to(device, non_blocking=True), trn_y.to(device, non_blocking=True)
        val_X, val_y = val_X.to(device, non_blocking=True), val_y.to(device, non_blocking=True)
        N = trn_X.size(0)

        # phase 2. architect step (alpha)
        alpha_optim.zero_grad()
        architect.unrolled_backward(config, trn_X, trn_y, val_X, val_y, lr, w_optim)
        alpha_optim.step()

        # phase 1. child network step (w)
        w_optim.zero_grad()
        logits = model(trn_X)
        loss = model.module.criterion(logits, trn_y)
        loss.backward()
        # gradient clipping
        nn.utils.clip_grad_norm_(model.module.weights(), config.w_grad_clip)
        if config.dist_privacy:
            # privice gradient clipping
            clipping_dispatcher(model.module.named_weights(),
                                config.max_weights_grad_norm,
                                config.var_gamma,
                                device,
                                logger
                                )
        w_optim.step()

        prec1, prec5 = utils.accuracy(logits, trn_y, topk=(1, 5))
        losses.update(loss.item(), N)
        top1.update(prec1.item(), N)
        top5.update(prec5.item(), N)

        if step % config.print_freq == 0 or step == len(train_loader)-1:
            logger.info(
                "Train: [{:2d}/{}] Step {:03d}/{:03d} Loss {losses.avg:.3f} "
                "Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})".format(
                    epoch+1, config.epochs, step, len(train_loader)-1, losses=losses,
                    top1=top1, top5=top5))

        writer.add_scalar('train/loss', loss.item(), cur_step)
        writer.add_scalar('train/top1', prec1.item(), cur_step)
        writer.add_scalar('train/top5', prec5.item(), cur_step)
        cur_step += 1

    logger.info("Train: [{:2d}/{}] Final Prec@1 {:.4%}".format(epoch+1, config.epochs, top1.avg))
Exemplo n.º 6
0
def test(test_loader, model, criterion, epoch, minimal_error):
    batch_time = utils.AverageMeter()
    data_time = utils.AverageMeter()
    losses = utils.AverageMeter()
    error = utils.AverageMeter()

    with torch.no_grad():
        model.eval()
        end = time.time()
        for batch_idx, input in enumerate(test_loader):
            data_time.update(time.time() - end)
            data, target = tuple(input[:len(input) - 1]), input[-1]
            if args.cuda:
                data, target = tuple(map(lambda x: x.cuda(),
                                         data)), target.cuda()

            output = model(*data)
            target = target.view(target.size(0), -1)
            loss = criterion(output, target)

            point_error = compute_error(output, target)
            losses.update(loss.item(), data[0].size(0))
            error.update(point_error, data[0].size(0))

            batch_time.update(time.time() - end)
            end = time.time()

            # print the intermediate results
            if batch_idx % args.print_freq == 0:
                logging.info(
                    'Time({}:{:.0f}), Test Epoch [{}]: [{}/{}]\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                    'Loss {loss.val:.3f} ({loss.avg:.3f})\t'
                    'Error {error.val:.3f} ({error.avg:.3f})'.format(
                        time.strftime('%Y-%m-%d %H:%M',
                                      time.localtime(time.time())),
                        time.time() % 60,
                        epoch,
                        batch_idx,
                        len(test_loader),
                        batch_time=batch_time,
                        data_time=data_time,
                        loss=losses,
                        error=error))

        writer.add_scalar('Test/Loss', losses.avg, epoch)
        writer.add_scalar('Test/Error', error.avg, epoch)
        logging.info(
            ' * Test Error {error.avg:.3f} Minimal_error {minimal_error:.3f}'.
            format(error=error, minimal_error=minimal_error))

    return error.avg
Exemplo n.º 7
0
    def train(self, model, epoch, optim_obj='Weights', search_stage=0):
        assert optim_obj in ['Weights', 'Arch']
        objs = utils.AverageMeter()
        top1 = utils.AverageMeter()
        top5 = utils.AverageMeter()
        sub_obj_avg = utils.AverageMeter()
        data_time = utils.AverageMeter()
        batch_time = utils.AverageMeter()
        model.train()

        start = time.time()
        if optim_obj == 'Weights':
            prefetcher = data_prefetcher(self.train_data)
        elif optim_obj == 'Arch':
            prefetcher = data_prefetcher(self.val_data)

        input, target = prefetcher.next()
        step = 0
        while input is not None:
            input, target = input.cuda(), target.cuda()
            data_t = time.time() - start
            n = input.size(0)
            if optim_obj == 'Weights':
                self.scheduler.step()
                if step == 0:
                    logging.info(
                        'epoch %d weight_lr %e', epoch,
                        self.search_optim.weight_optimizer.param_groups[0]
                        ['lr'])
                logits, loss, sub_obj = self.search_optim.weight_step(
                    input, target, model, search_stage)
            elif optim_obj == 'Arch':
                if step == 0:
                    logging.info(
                        'epoch %d arch_lr %e', epoch,
                        self.search_optim.arch_optimizer.param_groups[0]['lr'])
                logits, loss, sub_obj = self.search_optim.arch_step(
                    input, target, model, search_stage)

            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
            del logits, input, target

            batch_t = time.time() - start
            objs.update(loss, n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)
            sub_obj_avg.update(sub_obj)
            data_time.update(data_t)
            batch_time.update(batch_t)

            if step != 0 and step % self.args.report_freq == 0:
                logging.info(
                    'Train%s epoch %03d step %03d | loss %.4f %s %.2f top1_acc %.2f top5_acc %.2f | batch_time %.3f data_time %.3f',
                    optim_obj, epoch, step, objs.avg, self.sub_obj_type,
                    sub_obj_avg.avg, top1.avg, top5.avg, batch_time.avg,
                    data_time.avg)
            start = time.time()
            step += 1
            input, target = prefetcher.next()
        return top1.avg, top5.avg, objs.avg, sub_obj_avg.avg, batch_time.avg
Exemplo n.º 8
0
def train(data_loader, model, criterion, optimizer, epoch, stage, logger,
          args):
    loss_avg = utils.AverageMeter()
    top1_res = utils.AverageMeter()
    top5_res = utils.AverageMeter()
    global_step = epoch * len(data_loader)
    model.train()

    logger.log("stage: {}".format(stage))

    for step, (images, labels) in enumerate(data_loader):
        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)
        num_samples = images.size(0)
        optimizer.zero_grad()

        logits = model(images)
        loss = criterion(logits, labels)
        prec1_res, prec5_res = utils.accuracy(logits.detach(),
                                              labels,
                                              topk=(1, 5))
        top1_res.update(prec1_res.item(), num_samples)
        top5_res.update(prec5_res.item(), num_samples)
        loss_avg.update(loss.detach().data.item(), num_samples)

        loss.backward()
        optimizer.step()

        epochs = args.baseline_epochs
        if step % 100 == 0 or step == len(data_loader) - 1:
            logger.log("Train, Epoch: [{:3d}/{}], Step: [{:3d}/{}], " \
                        "Loss: {:.4f}, Prec@(res1, res5):  {:.4%}, {:.4%}".format(
                            epoch, epochs, step, len(data_loader),
                            loss_avg.avg,   top1_res.avg,  top5_res.avg))

        global_step += 1
    logger.log("Train, Epoch: [{:3d}/{}], Step: [{:3d}/{}], " \
                        "Loss: {:.4f}, Prec@(res1, res5):  {:.4%}, {:.4%}".format(
                            epoch, epochs, step, len(data_loader),
                            loss_avg.avg, top1_res.avg,  top5_res.avg))
Exemplo n.º 9
0
def run_epoch(epoch, dataloader, model, criterion, args, optimizer=None, is_train=True):
    batch_time = utils.AverageMeter('Time', ':6.3f')
    losses = utils.AverageMeter('Loss', ':.4e')
    progress = utils.ProgressMeter(len(dataloader), batch_time, losses, prefix="Epoch: [{}]".format(epoch))
    
    time_shift = args.time_shift
    end = time.time()

    for i, data in enumerate(dataloader):
        utt_list, feats, _, _, _, _, _ = data
        mask_size = feats.size(1)-time_shift
        if args.net_type == 'tsfm':
            uni_mask = (feats[:,:-time_shift,:] != 0)[:,:,0].unsqueeze(-2).byte() & subsequent_mask(mask_size)
        
        if args.use_gpu:
            feats = feats.cuda()
            if args.net_type == 'tsfm':
                uni_mask = uni_mask.cuda()
        
        if args.net_type == 'tsfm':
            outputs = model(feats[:, :-time_shift, :], uni_mask)
        if args.net_type == 'rnn':
            outputs = model(feats[:, :-time_shift, :])
        
        mask = feats[:,:-time_shift,:] != 0
        loss = criterion(outputs.masked_select(mask), feats[:, time_shift:, :].masked_select(mask))
        if is_train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        losses.update(loss.item(), feats.size(0))
        batch_time.update(time.time() - end)

        if i % args.print_freq == 0:
            progress.print(i)
    return losses.avg
Exemplo n.º 10
0
    def infer(self, model, epoch):
        objs = utils.AverageMeter()
        top1 = utils.AverageMeter()
        top5 = utils.AverageMeter()
        sub_obj_avg = utils.AverageMeter()
        data_time = utils.AverageMeter()
        batch_time = utils.AverageMeter()

        model.train()  # don't use running_mean and running_var during search
        start = time.time()
        prefetcher = data_prefetcher(self.val_data)
        input, target = prefetcher.next()
        step = 0
        while input is not None:
            step += 1
            data_t = time.time() - start
            n = input.size(0)

            logits, loss, sub_obj = self.search_optim.valid_step(
                input, target, model)
            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))

            batch_t = time.time() - start
            objs.update(loss, n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)
            sub_obj_avg.update(sub_obj)
            data_time.update(data_t)
            batch_time.update(batch_t)

            if step % self.args.report_freq == 0:
                logging.info(
                    'Val epoch %03d step %03d | loss %.4f %s %.2f top1_acc %.2f top5_acc %.2f | batch_time %.3f data_time %.3f',
                    epoch, step, objs.avg, self.sub_obj_type, sub_obj_avg.avg,
                    top1.avg, top5.avg, batch_time.avg, data_time.avg)
            start = time.time()
            input, target = prefetcher.next()

        return top1.avg, top5.avg, objs.avg, sub_obj_avg.avg, batch_time.avg
Exemplo n.º 11
0
def train(train_loader, encoder, decoder, criterion, encoder_optim,
          decoder_optim, epoch, opt, num):
    """
    train for one epoch on the training set
    """
    batch_time = utils.AverageMeter()
    losses = utils.AverageMeter()

    # training mode
    encoder.train()
    decoder.train()

    end = time.time()

    global sim_rec
    global loss_rec

    teaching_ratio = opt.teaching_ratio

    for i, (vfeat, afeat) in enumerate(train_loader):

        bs = vfeat.size()[0]
        seq_length = 120

        # reverse vfeat
        for j in range(59):
            vfeat[:, j, :], vfeat[:,
                                  119 - j, :] = vfeat[:,
                                                      119 - j, :], vfeat[:,
                                                                         j, :]
        # do PCA
        vfeat = pca_tensor(vfeat, pr=False, dim=1024, feat='vfeat')
        afeat = pca_tensor(afeat, pr=False, dim=128, feat='afeat')

        vfeat = Variable(vfeat)
        target = Variable(afeat)

        encoder_optim.zero_grad()
        decoder_optim.zero_grad()

        loss = 0

        # if you have gpu, then shift data to GPU
        if opt.cuda:
            vfeat = vfeat.cuda()
            target = target.cuda()

        # use video features to generate encoder_output and encoder_hidden (to be the initial hidden for decoder)
        encoder_hidden = encoder.init_hidden(batch_size=bs)
        encoder_output, encoder_hidden = encoder(vfeat, encoder_hidden)

        # decoder
        decoder_hidden = encoder_hidden
        decoder_input = encoder_output[:, 119, :].clone()  # bs * 128
        if opt.cuda:
            decoder_input = decoder_input.cuda()
        decoder_context = torch.mean(encoder_output, dim=1)  # bs * 128

        teaching = random.random() < teaching_ratio

        if teaching:
            for seq in range(seq_length):
                audio_output, decoder_context, decoder_hidden, attn_weights = decoder(
                    decoder_input,
                    decoder_context,
                    decoder_hidden,
                    encoder_output,
                    seq=seq)
                loss += criterion(audio_output, target[:, seq, :])
                decoder_input = target[:, seq, :]
        else:
            for seq in range(seq_length):
                audio_output, decoder_context, decoder_hidden, attn_weights = decoder(
                    decoder_input,
                    decoder_context,
                    decoder_hidden,
                    encoder_output,
                    seq=seq)
                loss += criterion(audio_output, target[:, seq, :])
                decoder_input = audio_output

        loss = loss / seq_length
        loss_rec.append(loss.data[0])

        losses.update(loss.data[0], vfeat.size(0))
        loss.backward()

        torch.nn.utils.clip_grad_norm(encoder.parameters(), opt.gradient_clip)
        torch.nn.utils.clip_grad_norm(decoder.parameters(), opt.gradient_clip)

        encoder_optim.step()
        decoder_optim.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % opt.print_freq == 0:
            log_str = 'No.{} Epoch: [{}][{}/{}]\t Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                num,
                epoch,
                i,
                len(train_loader),
                batch_time=batch_time,
                loss=losses)
            print(log_str)
Exemplo n.º 12
0
Arquivo: run.py Projeto: kc-ml2/darts
def train(train_loader, valid_loader, model, arch, w_optim, alpha_optim, lr,
          epoch):
    top1 = utils.AverageMeter()
    top5 = utils.AverageMeter()
    losses = utils.AverageMeter()

    cur_step = epoch * len(train_loader)
    tb_writer.add_scalar('train/lr', lr, cur_step)

    model.train()

    for step, ((train_X, train_y),
               (valid_X, valid_y)) in enumerate(zip(train_loader,
                                                    valid_loader)):
        train_X, train_y = train_X.to(device, non_blocking=True), train_y.to(
            device, non_blocking=True)
        valid_X, valid_y = valid_X.to(device, non_blocking=True), valid_y.to(
            device, non_blocking=True)
        N = train_X.size(0)

        # arch step (alpha training)
        alpha_optim.zero_grad()
        arch.unrolled_backward(train_X, train_y, valid_X, valid_y, lr, w_optim)
        alpha_optim.step()

        # child network step (w)
        w_optim.zero_grad()
        logits = model(train_X)
        loss = model.criterion(logits, train_y)
        loss.backward()

        # gradient clipping
        nn.utils.clip_grad_norm_(model.weights(), config.w_grad_clip)
        w_optim.step()

        prec1, prec5 = utils.accuracy(logits, train_y, topk=(1, 5))
        losses.update(loss.item(), N)
        top1.update(prec1.item(), N)
        top5.update(prec5.item(), N)

        if step % config.print_freq == 0 or step == len(train_loader) - 1:
            print("\r", end="", flush=True)
            logger.info(
                "Train: [{:2d}/{}] Step {:03d}/{:03d} Loss {losses.avg:.3f} "
                "Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})".format(
                    epoch + 1,
                    config.epochs,
                    step,
                    len(train_loader) - 1,
                    losses=losses,
                    top1=top1,
                    top5=top5))
        else:
            print(
                "\rTrain: [{:2d}/{}] Step {:03d}/{:03d} Loss {losses.avg:.3f} "
                "Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})".format(
                    epoch + 1,
                    config.epochs,
                    step,
                    len(train_loader) - 1,
                    losses=losses,
                    top1=top1,
                    top5=top5),
                end="",
                flush=True)

        tb_writer.add_scalar('train/loss', loss.item(), cur_step)
        tb_writer.add_scalar('train/top1', prec1.item(), cur_step)
        tb_writer.add_scalar('train/top5', prec5.item(), cur_step)

        if step % (config.print_freq // 5) == 0 or step == len(
                train_loader) - 1:  # not too much logging
            for i, tensor in enumerate(model.alpha_normal):
                for j, lsn in enumerate(F.softmax(tensor, dim=-1)):
                    tb_writer.add_scalars(
                        'alpha_normal/%d ~~ %d' % ((j - 2), i), {
                            'max_pl3': lsn[0],
                            'avg_pl3': lsn[1],
                            'skip_cn': lsn[2],
                            'sep_conv3': lsn[3],
                            'sep_conv5': lsn[4],
                            'dil_conv3': lsn[5],
                            'dil_conv5': lsn[6],
                            'none': lsn[7]
                        }, cur_step)
            for i, tensor in enumerate(model.alpha_reduce):
                for j, lsr in enumerate(F.softmax(tensor, dim=-1)):
                    tb_writer.add_scalars(
                        'alpha_reduce/%d ~~ %d' % ((j - 2), i), {
                            'max_pl3': lsr[0],
                            'avg_pl3': lsr[1],
                            'skip_cn': lsr[2],
                            'sep_conv3': lsr[3],
                            'sep_conv5': lsr[4],
                            'dil_conv3': lsr[5],
                            'dil_conv5': lsr[6],
                            'none': lsr[7]
                        }, cur_step)

        cur_step += 1

    logger.info("Train: [{:2d}/{}] Final Prec@1 {:.4%}".format(
        epoch + 1, config.epochs, top1.avg))
Exemplo n.º 13
0
def run_epoch(epoch,
              dataloader,
              model,
              criterion_ctc,
              criterion_phrase,
              args,
              optimizer=None,
              is_train=True):
    batch_time = utils.AverageMeter('Time', ':6.3f')
    losses = utils.AverageMeter('Loss', ':.4e')
    ctc_losses = utils.AverageMeter('CtcLoss', ":.4e")
    phrase_losses = utils.AverageMeter('PIDLoss', ":.4e")
    phone_wers = utils.AverageMeter('Phone_WER', ':.4f')
    pid_accs = utils.AverageMeter('PID_Acc', ':.4f')
    progress = utils.ProgressMeter(len(dataloader),
                                   batch_time,
                                   losses,
                                   ctc_losses,
                                   phone_wers,
                                   phrase_losses,
                                   pid_accs,
                                   prefix="Epoch: [{}]".format(epoch))

    end = time.time()

    for i, data in enumerate(dataloader):
        utt_list, feats, labels, feat_sizes, label_sizes, phrase_label, _ = data
        batch_size, mask_size, _ = feats.size()
        feat_sizes /= 2
        #uni_mask = (feats != 0)[:,:,0].unsqueeze(-2).byte() & subsequent_mask(mask_size)

        if args.use_gpu:
            feats = feats.cuda()
            #uni_mask = uni_mask.cuda()
            labels = labels.cuda()
            feat_sizes = feat_sizes.cuda()
            label_sizes = label_sizes.cuda()
            phrase_label = phrase_label.cuda()

        ctc_out, phrase_out, _ = model(feats, feat_sizes)

        ctc_loss = criterion_ctc(ctc_out.transpose(0, 1), labels, feat_sizes,
                                 label_sizes)
        phrase_loss = criterion_phrase(phrase_out, phrase_label.view(-1))
        loss = ctc_loss + args.phrase_lambda * phrase_loss
        batch_errs, batch_tokens = model.compute_wer(
            torch.max(ctc_out, dim=-1)[1].cpu().numpy(),
            feat_sizes.cpu().numpy(),
            labels.cpu().numpy(),
            label_sizes.cpu().numpy())

        phone_wers.update(batch_errs / batch_tokens, batch_tokens)
        correct = torch.sum(
            torch.argmax(phrase_out, dim=-1).view(-1) == phrase_label.view(
                -1)).item()
        batch_num = torch.sum((phrase_label != -1)).item() + 0.0000001
        pid_accs.update(correct / batch_num, batch_num)

        if is_train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        losses.update(loss.item(), batch_size)
        ctc_losses.update(ctc_loss.item(), batch_size)
        phrase_losses.update(phrase_loss.item(), batch_size)
        batch_time.update(time.time() - end)

        if i % args.print_freq == 0:
            progress.print(i)
    return losses.avg, phone_wers.avg
Exemplo n.º 14
0
def train(train_loader, train_mpii_loader, model, criterion, optimizer, epoch):
    batch_time = utils.AverageMeter()
    data_time = utils.AverageMeter()
    losses = utils.AverageMeter()
    mpii_losses = utils.AverageMeter()
    point_errors = utils.AverageMeter()
    angle_errors = utils.AverageMeter()

    model.train()
    end = time.time()
    #TODO: add MPIIGaze dataset iteration
    train_mpii_iterator = iter(train_mpii_loader)
    for batch_idx, input in enumerate(train_loader):
        data_time.update(time.time() - end)
        data, target = tuple(input[:len(input) - 1]), input[-1]
        if args.cuda:
            data, target = tuple(map(lambda x: x.cuda(), data)), target.cuda()

        output = model(*data)
        target = target.view(target.size(0), -1)
        loss = criterion(output, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Measure point error and record loss
        point_error = compute_error(output, target)
        losses.update(loss.item(), data[0].size(0))
        point_errors.update(point_error, data[0].size(0))

        batch_time.update(time.time() - end)
        end = time.time()

        writer.add_scalar('Train/Loss', losses.avg,
                          batch_idx + len(train_loader) * epoch)
        writer.add_scalar('Train/Error', point_errors.avg,
                          batch_idx + len(train_loader) * epoch)

        # print the intermediate results
        if batch_idx % args.print_freq == 0:
            logging.info('Time({}:{:.0f}), Train Epoch [{}]: [{}/{}]\t'
                         'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                         'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                         'Loss {loss.val:.3f} ({loss.avg:.3f})\t'
                         'Error {error.val:.3f} ({error.avg:.3f})'.format(
                             time.strftime('%Y-%m-%d %H:%M',
                                           time.localtime(time.time())),
                             time.time() % 60,
                             epoch,
                             batch_idx,
                             len(train_loader),
                             batch_time=batch_time,
                             data_time=data_time,
                             loss=losses,
                             error=point_errors))

        #TODO: MPIIGaze dataset iteration
        (mpii_input, mpii_target), train_mpii_iterator = utils.infinite_get(
            train_mpii_iterator, train_mpii_loader)
        if args.cuda:
            mpii_input, mpii_target = tuple(map(
                lambda x: x.cuda(), mpii_input)), mpii_target.cuda()

        mpii_output = model(*mpii_input)
        mpii_target = mpii_target.view(mpii_target.size(0), -1)

        #TODO: add coplanar loss
        mpii_loss = criterion(mpii_output, mpii_target) + 0.000001 * criterion(
            distance, mpii_target[2])

        optimizer.zero_grad()
        mpii_loss.backward()
        optimizer.step()

        # Measure angle error and record loss
        angle_error = compute_angle_error(mpii_output, mpii_target)
        mpii_losses.update(mpii_loss.item(), data[0].size(0))
        angle_errors.update(angle_error, mpii_input[0].size(0))

        writer.add_scalar('Train/MPII_Loss', mpii_losses.avg,
                          batch_idx + len(train_loader) * epoch)
        writer.add_scalar('Train/Angle_Error', angle_errors.avg,
                          batch_idx + len(train_loader) * epoch)

        # print the intermediate results
        if batch_idx % args.print_freq == 0:
            logging.info('Time({}:{:.0f}), Train Epoch [{}]: [{}/{}]\t'
                         'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                         'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                         'Loss {loss.val:.3f} ({loss.avg:.3f})\t'
                         'Error {error.val:.3f} ({error.avg:.3f})'.format(
                             time.strftime('%Y-%m-%d %H:%M',
                                           time.localtime(time.time())),
                             time.time() % 60,
                             epoch,
                             batch_idx,
                             len(train_loader),
                             batch_time=batch_time,
                             data_time=data_time,
                             loss=losses,
                             error=angle_errors))
Exemplo n.º 15
0
def train(train_loader, model, criterion, optimizer, epoch, opt):
    """
    train for one epoch on the training set
    """
    batch_time = utils.AverageMeter()
    losses = utils.AverageMeter()

    # training mode
    model.train()
    right = 0
    total = 0
    end = time.time()
    for i, (vfeat, afeat) in enumerate(train_loader):
        # shuffling the index orders
        bz = vfeat.size()[0]
        orders = np.arange(bz).astype('int32')
        shuffle_orders = orders.copy()
        np.random.shuffle(shuffle_orders)
        if (shuffle_orders == orders).all():
            np.random.shuffle(shuffle_orders)
        label1 = (orders == shuffle_orders + 0).astype('float32')
        shuffle_orders = torch.from_numpy(shuffle_orders).long()

        # more negative data
        augment_order = torch.from_numpy(np.random.permutation(11)).long()
        # augment_order=torch.from_numpy(np.random.permutation(120)).long()
        vfeat3_source = vfeat[shuffle_orders]
        vfeat3 = vfeat3_source[:, augment_order, :]
        afeat3_source = afeat[shuffle_orders]
        afeat3 = afeat3_source[:, augment_order, :]

        # creating a new data with the shuffled indices
        afeat2 = afeat[shuffle_orders].clone()

        # concat the vfeat and afeat respectively
        afeat0 = torch.cat((afeat, afeat2, afeat3), 0)
        vfeat0 = torch.cat((vfeat, vfeat, vfeat3), 0)

        # generating the labels
        # 1. the labels for the shuffled feats
        target1 = torch.from_numpy(label1)

        # 2. the labels for the original feats
        label2 = label1.copy()
        label2[:] = 1
        target2 = torch.from_numpy(label2)

        # concat the labels together
        target = torch.cat((target2, target1, torch.zeros(target1.size(0))), 0)
        target = 1 - target

        # transpose the feats
        vfeat0 = vfeat0.transpose(2, 1)
        afeat0 = afeat0.transpose(2, 1)

        # put the data into Variable
        vfeat_var = Variable(vfeat0)
        afeat_var = Variable(afeat0)
        target_var = Variable(target)

        # if you have gpu, then shift data to GPU
        if opt.cuda:
            vfeat_var = vfeat_var.cuda()
            afeat_var = afeat_var.cuda()
            target_var = target_var.cuda()

        # forward, backward optimize
        sim = model(vfeat_var, afeat_var)  # inference simialrity
        loss = criterion(sim, target_var)  # compute contrastive loss

        res = (sim > 0.5).float()
        right += torch.sum(res == target_var).data[0]
        ##############################
        # update loss in the loss meter
        ##############################
        losses.update(loss.data[0], vfeat0.size(0))

        ##############################
        # compute gradient and do sgd
        ##############################
        optimizer.zero_grad()
        loss.backward()

        ##############################
        # gradient clip stuff
        ##############################
        #utils.clip_gradient(optimizer, opt.gradient_clip)

        # update parameters
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % opt.print_freq == 0:
            log_str = 'Epoch: [{0}][{1}/{2}]\t Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                epoch,
                i,
                len(train_loader),
                batch_time=batch_time,
                loss=losses)
            print(log_str)
    print(right / 1270 / 3, 1270 * 3 - right)
Exemplo n.º 16
0
def main():
    if not torch.cuda.is_available():
        print('no gpu device available')
        sys.exit(1)
    if args.random_seed:
        args.seed = np.random.randint(0, 1000, 1)
    # reproducible ,再次运行代码时,初始化值不变。
    #you should ensure that all other libraries your code relies on and which use random numbers also use a fixed seed.
    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = torch.nn.BCEWithLogitsLoss().cuda()
    ## in_channels是特征维度 !!
    model = Network(args.init_channels,
                    args.classes,
                    args.num_cells,
                    criterion,
                    args.n_steps,
                    in_channels=args.in_channels).cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    num_edges = model._steps * 2
    post_train = 5
    args.epochs = args.warmup_dec_epoch + args.decision_freq * (
        num_edges - 1) + post_train + 1
    logging.info("total epochs: %d", args.epochs)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    normal_selected_idxs = torch.tensor(len(model.alphas_normal) * [-1],
                                        requires_grad=False,
                                        dtype=torch.int).cuda()
    normal_candidate_flags = torch.tensor(len(model.alphas_normal) * [True],
                                          requires_grad=False,
                                          dtype=torch.bool).cuda()
    logging.info('normal_selected_idxs: {}'.format(normal_selected_idxs))
    logging.info('normal_candidate_flags: {}'.format(normal_candidate_flags))
    model.normal_selected_idxs = normal_selected_idxs
    model.normal_candidate_flags = normal_candidate_flags

    print(F.softmax(torch.stack(model.alphas_normal, dim=0), dim=-1).detach())

    normal_probs_history = []
    train_losses, valid_losses = utils.AverageMeter(), utils.AverageMeter()
    for epoch in range(args.epochs):
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        # training
        train_acc, train_loss = train(model, architect, criterion, optimizer,
                                      lr)
        print("!!!!!!!!!!!!!!!!train_loss:", train_loss)
        valid_acc, valid_losses = infer(model, criterion, valid_losses)
        logging.info('train_acc %f\tvalid_acc %f', train_acc, valid_acc)

        # make edge decisions
        saved_memory_normal, model.normal_selected_idxs, \
        model.normal_candidate_flags = edge_decision('normal',
                                                     model.alphas_normal,
                                                     model.normal_selected_idxs,
                                                     model.normal_candidate_flags,
                                                     normal_probs_history,
                                                     epoch,
                                                     model,
                                                     args)

        writer.add_scalar('stats/train_acc', train_acc, epoch)
        writer.add_scalar('stats/valid_acc', valid_acc, epoch)
        utils.save(model, os.path.join(args.save, 'search_weights.pt'))
        scheduler.step()

    logging.info("#" * 30 + " Done " + "#" * 30)
    logging.info('genotype = %s', model.get_genotype())
Exemplo n.º 17
0
def train(train_loader, model, criterion, optimizer, epoch, opt, eponum):
    """
    train for one epoch on the training set
    """
    batch_time = utils.AverageMeter()
    losses = utils.AverageMeter()

    # training mode
    model.train()
    model.float()
    loss_rec = []
    end = time.time()
    for i, (data, label) in enumerate(train_loader):
        #model.train()
        #model.float()
        # shuffling the index orders
        bz = label.size()[0]
        orders = np.arange(bz).astype('int32')
        shuffle_orders = orders.copy()
        np.random.shuffle(shuffle_orders)

        # creating a new data with the shuffled indices
        data = data[torch.from_numpy(shuffle_orders).long()].clone()
        label = label[torch.from_numpy(shuffle_orders).long()].clone()

        # concat the vfeat and afeat respectively

        # generating the labels
        # 1. the labels for the shuffled feats

        # 2. the labels for the original feats
        #label = label.astype(np.int64)
        #label = torch.from_numpy(label)
        label = label.view(label.size(0))
        #one_hot = torch.zeros(np.shape(label)[0], 10).scatter_(1, label, 1)
        #one_hot = one_hot.type(torch.LongTensor)
        one_hot = torch.LongTensor(label)
        # transpose the feats
        # vfeat0 = vfeat0.transpose(2, 1)
        # afeat0 = afeat0.transpose(2, 1)

        # put the data into Variable
        data_var = Variable(data)
        target_var = Variable(one_hot)

        # if you have gpu, then shift data to GPU
        if opt.cuda:
            data_var = data_var.cuda()
            target_var = target_var.cuda()

        # forward, backward optimize
        sim = model(data_var)  # inference simialrity
        #print(sim)
        #print(target_var)
        loss = criterion(sim, target_var)  # compute contrastive loss

        loss_rec.append(list(loss.data)[0])

        ##############################
        # update loss in the loss meter
        ##############################
        losses.update(loss.data[0], label.size(0))

        ##############################
        # compute gradient and do sgd
        ##############################
        optimizer.zero_grad()
        loss.backward()

        ##############################
        # gradient clip stuff
        ##############################
        # utils.clip_gradient(optimizer, opt.gradient_clip)

        # update parameters
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % opt.print_freq == 0:
            log_str = 'Fold:[{3}] Epoch: [{0}][{1}/{2}]\t Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                eponum,
                i,
                len(train_loader),
                epoch,
                batch_time=batch_time,
                loss=losses)
            print(log_str)
Exemplo n.º 18
0
def train(data_loader, model, criterion, optimizer_t, optimizer_s, epoch,
          stage, logger, args):

    [loss_avg, mse_avg, top1_cnn, top5_cnn, top1_res,
     top5_res] = [utils.AverageMeter() for _ in range(6)]
    global_step = epoch * len(data_loader)
    model.train()
    logger.log("stage: {}".format(stage))
    m = Cosine(min_v=args.dc, max_v=1.0, epoch=epoch, epoch_max=60)
    #m = 1.0
    model.module.reset_margin()
    for step, (images, labels) in enumerate(data_loader):
        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)
        num_samples = images.size(0)
        if optimizer_t is not None:
            optimizer_t.zero_grad()
        if optimizer_s is not None:
            optimizer_s.zero_grad()

        if "TA" in stage:

            ###train teacher#####################
            model.module.teacher.eval()
            logits_teacher, teacher_feas = model(images,
                                                 stage='RES_TA',
                                                 epoch=epoch)
            #logits_teacher, _ = model(images, stage='RES_TA', epoch=epoch)
            model.module.teacher.eval()
            #####################################

            logits_student, _, loss_dis = model(images,
                                                stage=stage,
                                                epoch=epoch,
                                                teacher_feas=teacher_feas[-1])
            loss = 0

            loss_last = criterion(logits_student[-1], labels)
            loss_avg.update(loss_last.detach().item(), num_samples)
            loss += loss_last
            loss += loss_dis[-1].mean() * args.dis_weight
            #10^-3 for 32x32 image
            #10^-4 for 224x224 scale classification task
            #10^-5 for detection and segmentation task
            if isinstance(logits_student, list):
                prec1_cnn, prec5_cnn = utils.accuracy(
                    logits_student[-1].detach(), labels, topk=(1, 5))
            else:
                prec1_cnn, prec5_cnn = utils.accuracy(logits_student.detach(),
                                                      labels,
                                                      topk=(1, 5))

            prec1_cnn, prec5_cnn = utils.accuracy(logits_student[-1].detach(),
                                                  labels,
                                                  topk=(1, 5))
            prec1_res, prec5_res = utils.accuracy(logits_teacher.detach(),
                                                  labels,
                                                  topk=(1, 5))
            ### teacher is only updated by its own loss

            loss.backward()
            optimizer_s.step()
            top1_cnn.update(prec1_cnn.item(), num_samples)
            top5_cnn.update(prec5_cnn.item(), num_samples)
            top1_res.update(prec1_res.item(), num_samples)
            top5_res.update(prec5_res.item(), num_samples)

        elif "KD" in stage:

            ###train teacher#####################
            model.module.teacher.eval()
            logits_teacher, teacher_feas = model(images,
                                                 stage='RES_TA',
                                                 epoch=epoch)
            #logits_teacher, _ = model(images, stage='RES_TA', epoch=epoch)
            model.module.teacher.eval()
            #####################################

            logits_student, _, loss_dis = model(images,
                                                stage=stage,
                                                epoch=epoch,
                                                teacher_feas=teacher_feas[-1])
            loss = 0
            loss += criterion(logits_student[-1], labels)
            loss_avg.update(loss.detach().item(), num_samples)
            if loss_dis is not None:
                for loss_d in loss_dis[:-1]:
                    loss += loss_d.mean() * m * args.dis_weight
                mse_avg.update(loss_dis[-1].detach().mean().item(),
                               num_samples)
                loss += loss_dis[-1].mean() * args.dis_weight

            #10^-3 for 32x32 image
            #10^-4 for 224x224 scale classification task
            #10^-5 for detection and segmentation task
            if isinstance(logits_student, list):
                prec1_cnn, prec5_cnn = utils.accuracy(
                    logits_student[-1].detach(), labels, topk=(1, 5))
            else:
                prec1_cnn, prec5_cnn = utils.accuracy(logits_student.detach(),
                                                      labels,
                                                      topk=(1, 5))

            prec1_cnn, prec5_cnn = utils.accuracy(logits_student[-1].detach(),
                                                  labels,
                                                  topk=(1, 5))
            prec1_res, prec5_res = utils.accuracy(logits_teacher.detach(),
                                                  labels,
                                                  topk=(1, 5))
            ### teacher is only updated by its own loss

            loss.backward()
            optimizer_s.step()
            top1_cnn.update(prec1_cnn.item(), num_samples)
            top5_cnn.update(prec5_cnn.item(), num_samples)
            top1_res.update(prec1_res.item(), num_samples)
            top5_res.update(prec5_res.item(), num_samples)

        elif "KL" in stage:

            ###train teacher#####################
            model.module.teacher.eval()
            logits_teacher = model(images, stage='RES_NMT', epoch=epoch)
            #logits_teacher, _ = model(images, stage='RES_TA', epoch=epoch)
            model.module.teacher.eval()
            #####################################

            logits_student = model(images, stage="CNN_NMT", epoch=epoch)
            loss = loss_KD_fn(criterion,
                              logits_student,
                              logits_teacher,
                              targets=labels,
                              alpha=args.alpha,
                              temperature=args.temperature)
            #10^-3 for 32x32 image
            #10^-4 for 224x224 scale classification task
            #10^-5 for detection and segmentation task
            if isinstance(logits_student, list):
                prec1_cnn, prec5_cnn = utils.accuracy(logits_student.detach(),
                                                      labels,
                                                      topk=(1, 5))
            else:
                prec1_cnn, prec5_cnn = utils.accuracy(logits_student.detach(),
                                                      labels,
                                                      topk=(1, 5))

            prec1_cnn, prec5_cnn = utils.accuracy(logits_student.detach(),
                                                  labels,
                                                  topk=(1, 5))
            prec1_res, prec5_res = utils.accuracy(logits_teacher.detach(),
                                                  labels,
                                                  topk=(1, 5))
            ### teacher is only updated by its own loss

            loss.backward()
            optimizer_s.step()
            top1_cnn.update(prec1_cnn.item(), num_samples)
            top5_cnn.update(prec5_cnn.item(), num_samples)
            top1_res.update(prec1_res.item(), num_samples)
            top5_res.update(prec5_res.item(), num_samples)

        elif "JOINT" in stage:
            ## teacher and student are jointly trained from scratch

            ###train teacher#####################

            model.module.teacher.train()
            optimizer_t.zero_grad()
            logits_teacher, teacher_feas = model(images,
                                                 stage='RES_TA',
                                                 epoch=epoch)
            #logits_teacher, _ = model(images, stage='RES_TA', epoch=epoch)
            loss_teacher = criterion(logits_teacher, labels)
            loss_teacher.backward()
            optimizer_t.step()
            model.module.teacher.eval()
            #####################################

            logits_student, _, loss_dis = model(images,
                                                stage=stage,
                                                epoch=epoch,
                                                teacher_feas=teacher_feas[-1])
            loss = 0
            xishu = 1.0 / 4.
            for logit_student in logits_student[:-1]:
                KD_TRAIN = False
                if KD_TRAIN:
                    loss += loss_KD_fn(
                        criterion,
                        logit_student,
                        logits_teacher,
                        targets=labels,
                        alpha=args.alpha,
                        temperature=args.temperature) * m * xishu
                else:
                    loss += criterion(logit_student, labels) * m * xishu
            loss_last = criterion(logits_student[-1], labels) * xishu
            loss_avg.update(loss_last.detach().item(), num_samples)
            loss += loss_last

            if loss_dis is not None:
                for loss_d in loss_dis[:-1]:
                    loss += loss_d.mean() * m * xishu * args.dis_weight
                mse_avg.update(loss_dis[-1].detach().mean().item(),
                               num_samples)
                loss += loss_dis[-1].mean() * args.dis_weight * xishu

            #10^-3 for 32x32 image
            #10^-4 for 224x224 scale classification task
            #10^-5 for detection and segmentation task
            if isinstance(logits_student, list):
                prec1_cnn, prec5_cnn = utils.accuracy(
                    logits_student[-1].detach(), labels, topk=(1, 5))
            else:
                prec1_cnn, prec5_cnn = utils.accuracy(logits_student.detach(),
                                                      labels,
                                                      topk=(1, 5))

            prec1_cnn, prec5_cnn = utils.accuracy(logits_student[-1].detach(),
                                                  labels,
                                                  topk=(1, 5))
            prec1_res, prec5_res = utils.accuracy(logits_teacher.detach(),
                                                  labels,
                                                  topk=(1, 5))
            ### teacher is only updated by its own loss
            loss.backward()
            #for n, v in model.named_parameters():
            #    print(n)
            #    print(v.grad.mean())
            #pdb.set_trace()
            optimizer_s.step()
            top1_cnn.update(prec1_cnn.item(), num_samples)
            top5_cnn.update(prec5_cnn.item(), num_samples)
            top1_res.update(prec1_res.item(), num_samples)
            top5_res.update(prec5_res.item(), num_samples)

        elif "RES_NMT" in stage:
            logits = model(images, stage='RES_NMT')
            loss = criterion(logits, labels)
            prec1_res, prec5_res = utils.accuracy(logits.detach(),
                                                  labels,
                                                  topk=(1, 5))
            top1_res.update(prec1_res.item(), num_samples)
            top5_res.update(prec5_res.item(), num_samples)
            loss_avg.update(loss.detach().data.item(), num_samples)
            loss.backward()
            optimizer_t.step()

        elif "CNN_NMT" in stage:
            logits = model(images, stage=stage)
            loss = criterion(logits, labels)
            prec1_cnn, prec5_cnn = utils.accuracy(logits.detach(),
                                                  labels,
                                                  topk=(1, 5))
            top1_cnn.update(prec1_cnn.item(), num_samples)
            top5_cnn.update(prec5_cnn.item(), num_samples)
            loss_avg.update(loss.detach().data.item(), num_samples)
            loss.backward()
            optimizer_s.step()

        elif "RES_KD" in stage:
            logit_student, logits_teacher = model(images, stage=stage)
            loss = loss_KD_fn(criterion,
                              logit_student,
                              logits_teacher,
                              targets=labels,
                              alpha=args.alpha,
                              temperature=args.temperature)
            prec1_res, prec5_res = utils.accuracy(logit_student.detach(),
                                                  labels,
                                                  topk=(1, 5))
            top1_res.update(prec1_res.item(), num_samples)
            top5_res.update(prec5_res.item(), num_samples)
            loss_avg.update(loss.detach().data.item(), num_samples)

        else:
            raise NameError("invalide stage nanme")

        epochs = args.baseline_epochs
        if step % 100 == 0 or step == len(data_loader) - 1:
            logger.log("Train, Epoch: [{:3d}/{}], Step: [{:3d}/{}], " \
                        "Loss: {:.4f}, Loss_dis: {:.4f}, Prec@(cnn1, res1, cnn5, res5): {:.4%},{:.4%}, {:.4%}, {:.4%}".format(
                            epoch, epochs, step, len(data_loader),
                            loss_avg.avg, mse_avg.avg, top1_cnn.avg, top1_res.avg, top5_cnn.avg, top5_res.avg))

        global_step += 1
    logger.log("m is {}".format(m))
    logger.log(
        "Train, Epoch: [{:3d}/{}], Final Prec: cnn, res@1: {:.4%}, {:.4%},  Final Prec: cnn, res@5: {:.4%}, {:.4%} Loss: {:.4f}"
        .format(epoch, epochs, top1_cnn.avg, top1_res.avg, top5_cnn.avg,
                top5_res.avg, loss_avg.avg))
Exemplo n.º 19
0
def valid(data_loader, model, criterion, epoch, global_step, stage, logger,
          args):

    loss_avg = utils.AverageMeter()
    top1_cnn = utils.AverageMeter()
    top5_cnn = utils.AverageMeter()
    top1_res = utils.AverageMeter()
    top5_res = utils.AverageMeter()
    global_step = epoch * len(data_loader)

    model.eval()
    logger.log("stage: {}".format(stage))
    with torch.no_grad():
        for step, (images, labels) in enumerate(data_loader):

            images = images.cuda(non_blocking=True)
            labels = labels.cuda(non_blocking=True)
            num_samples = images.size(0)

            if "TA" in stage or "JOINT" in stage or "KD" in stage or "KL" in stage:
                with torch.no_grad():
                    logits = model(images, stage='CNN_NMT')
                    logits_teacher = model(images, stage='RES_NMT')
                    prec1_cnn, prec5_cnn = utils.accuracy(logits.detach(),
                                                          labels,
                                                          topk=(1, 5))
                    prec1_res, prec5_res = utils.accuracy(
                        logits_teacher.detach(), labels, topk=(1, 5))
                    loss = criterion(logits, labels)
                loss_avg.update(loss.detach().item(), num_samples)
                top1_cnn.update(prec1_cnn.item(), num_samples)
                top5_cnn.update(prec5_cnn.item(), num_samples)
                top1_res.update(prec1_res.item(), num_samples)
                top5_res.update(prec5_res.item(), num_samples)

            elif "RES_NMT" in stage:
                logits = model(images, stage=stage)
                loss = criterion(logits, labels)
                prec1_res, prec5_res = utils.accuracy(logits,
                                                      labels,
                                                      topk=(1, 5))
                top1_res.update(prec1_res.item(), num_samples)
                top5_res.update(prec5_res.item(), num_samples)
                loss_avg.update(loss.data.item(), num_samples)

            elif "CNN_NMT" in stage:
                logits = model(images, stage=stage)
                loss = criterion(logits, labels)
                prec1_cnn, prec5_cnn = utils.accuracy(logits,
                                                      labels,
                                                      topk=(1, 5))
                top1_cnn.update(prec1_cnn.item(), num_samples)
                top5_cnn.update(prec5_cnn.item(), num_samples)
                loss_avg.update(loss.data.item(), num_samples)

            elif "RES_KD" in stage:
                logit_student, logits_teacher = model(images, stage=stage)
                loss = loss_KD_fn(criterion,
                                  logit_student,
                                  logits_teacher,
                                  targets=labels,
                                  alpha=args.alpha,
                                  temperature=args.temperature)
                prec1_res, prec5_res = utils.accuracy(logit_student.detach(),
                                                      labels,
                                                      topk=(1, 5))
                top1_res.update(prec1_res.item(), num_samples)
                top5_res.update(prec5_res.item(), num_samples)
                loss_avg.update(loss.detach().data.item(), num_samples)
            else:
                raise NameError("invalide stage nanme")

            epochs = args.baseline_epochs
            if step % 100 == 0 or step == len(data_loader) - 1:
                logger.log("Valid, Epoch: [{:3d}/{}], Step: [{:3d}/{}], " \
                            "Loss: {:.4f}, Prec@(cnn1, res1, cnn5, res5): {:.4%},{:.4%}, {:.4%}, {:.4%}".format(
                                epoch, epochs, step, len(data_loader),
                                loss_avg.avg, top1_cnn.avg, top1_res.avg, top5_cnn.avg, top5_res.avg))

            global_step += 1

        logger.log(
            "Valid, Epoch: [{:3d}/{}], Final Prec: cnn, res@1: {:.4%}, {:.4%},  Final Prec: cnn, res@5: {:.4%}, {:.4%} Loss: {:.4f}"
            .format(epoch, epochs, top1_cnn.avg, top1_res.avg, top5_cnn.avg,
                    top5_res.avg, loss_avg.avg))

        if "RES" in stage:
            return top1_res.avg
        else:
            return top1_cnn.avg
Exemplo n.º 20
0
def train(train_loader, model, criterion, optimizer, epoch, opt, logger=None):
    """
    train for one epoch on the training set
    """
    batch_time = utils.AverageMeter()
    losses = utils.AverageMeter()

    # training mode
    model.train()

    end = time.time()
    for i, (vfeat, afeat) in enumerate(train_loader):
        # shuffling the index orders
        bz = vfeat.size()[0]
        orders = np.arange(bz).astype('int32')
        shuffle_orders = orders.copy()
        np.random.shuffle(shuffle_orders)

        # creating a new data with the shuffled indices
        afeat2 = afeat[torch.from_numpy(shuffle_orders).long()].clone()

        # concat the vfeat and afeat respectively
        afeat0 = torch.cat((afeat, afeat2), 0)
        vfeat0 = torch.cat((vfeat, vfeat), 0)

        # generating the labels
        # 1. the labels for the shuffled feats
        label1 = (orders == shuffle_orders + 0).astype('float32')
        target1 = torch.from_numpy(label1)

        # 2. the labels for the original feats
        label2 = label1.copy()
        label2[:] = 1
        target2 = torch.from_numpy(label2)

        # concat the labels together
        target = torch.cat((target2, target1), 0)
        target = 1 - target

        # put the data into Variable
        vfeat_var = Variable(vfeat0).cuda()
        afeat_var = Variable(afeat0).cuda()
        target_var = Variable(target).cuda()

        # forward, backward optimize
        sim = model(vfeat_var, afeat_var)  # inference similarity
        loss = criterion(sim, target_var)

        # update loss in the loss meter
        losses.update(loss.data[0], vfeat0.size(0))

        # compute gradient and do sgd
        optimizer.zero_grad()
        loss.backward()

        # update parameters
        optimizer.step()

        # logger=None means no logger
        if logger:
            logger.add_scalar('loss', loss.data[0], epoch)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % opt.print_freq == 0:
            print(f'Epoch: [{epoch}][{i}/{len(train_loader)}]\t'
                  f'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  f'Loss {losses.val:.4f} ({losses.avg:.4f})')

    return losses.avg
Exemplo n.º 21
0
def train(train_loader, model, criterion, optimizer, epoch, opt):
    """
    train for one epoch on the training set
    """
    batch_time = utils.AverageMeter()
    losses = utils.AverageMeter()

    # training mode
    model.train()

    end = time.time()
    for i, (vfeat, afeat) in enumerate(train_loader):
        #pdb.set_trace()
        # shuffling the index orders
        bz = vfeat.size()[0]
        orders = np.arange(bz)
        shuffle_orders = orders.copy()
        np.random.shuffle(shuffle_orders)

        # creating a new data with the shuffled indices
        afeat2 = afeat[torch.from_numpy(shuffle_orders)].clone()

        # concat the vfeat and afeat respectively
        afeat0 = torch.cat((afeat, afeat2), 0)
        vfeat0 = torch.cat((vfeat, vfeat), 0)

        # generating the labels
        # 1. the labels for the shuffled feats
        label1 = (orders == shuffle_orders + 0).astype('float32')
        target1 = torch.from_numpy(label1)

        # 2. the labels for the original feats
        label2 = label1.copy()
        label2[:] = 1
        target2 = torch.from_numpy(label2)

        # concat the labels together
        target = torch.cat((target2, target1), 0)
        target = 1 - target

        # put the data into Variable
        vfeat_var = Variable(vfeat0)
        afeat_var = Variable(afeat0)
        target_var = Variable(target)

        # if you have gpu, then shift data to GPU
        if opt.cuda:
            vfeat_var = vfeat_var.cuda()
            afeat_var = afeat_var.cuda()
            target_var = target_var.cuda()

        #pdb.set_trace()
        # forward, backward optimize
        sim = model(vfeat_var, afeat_var)   # inference simialrity
        loss = criterion(sim, target_var)   # compute contrastive loss

        ##############################
        # update loss in the loss meter
        ##############################
        losses.update(loss.data[0], vfeat0.size(0))

        ##############################
        # compute gradient and do sgd
        ##############################
        optimizer.zero_grad()
        loss.backward()

        ##############################
        # gradient clip stuff
        ##############################
        #utils.clip_gradient(optimizer, opt.gradient_clip)

        # update parameters
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % opt.print_freq == 0:
            log_str = 'Epoch: [{0}][{1}/{2}]\t Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t Loss {loss.val:.4f} ({loss.avg:.4f})'.format(epoch, i, len(train_loader), batch_time=batch_time, loss=losses)
            mylog.info(log_str)
Exemplo n.º 22
0
def train(train_loader, model, criterion, optimizer, epoch, opt):
    """
    train for one epoch on the training set
    """
    batch_time = utils.AverageMeter()
    losses = utils.AverageMeter()

    # training mode
    model.train()

    end = time.time()

    global dis1_rec
    global dis2_rec
    global loss_rec

    for i, (vfeat, afeat) in enumerate(train_loader):
        # shuffling the index orders
        bz = vfeat.size()[0]
        orders = np.arange(bz).astype('int32')
        shuffle_orders = orders.copy()
        np.random.shuffle(shuffle_orders)

        # creating a new data with the shuffled indices
        afeat2 = afeat[torch.from_numpy(shuffle_orders).long()].clone()

        # concat the vfeat and afeat respectively

        # generating the labels
        # 1. the labels for the shuffled feats
        label1 = (orders == shuffle_orders + 0).astype('float32')
        target1 = torch.from_numpy(label1)

        # 2. the labels for the original feats
        label2 = label1.copy()
        label2[:] = 1
        target2 = torch.from_numpy(label2)
        if np.random.randint(0, 100) % 2 == 0:
            # concat the labels together
            afeat0 = torch.cat((afeat, afeat2), 0)
            vfeat0 = torch.cat((vfeat, vfeat), 0)

            target = torch.cat((target2, target1), 0)
            target = 1 - target
        else:
            afeat0 = torch.cat((afeat2, afeat), 0)
            vfeat0 = torch.cat((vfeat, vfeat), 0)

            target = torch.cat((target1, target2), 0)
            target = 1 - target
        target = target.numpy()
        label = target.astype(np.int64)
        label = torch.from_numpy(label)
        label = label.view(label.size(0))
        #one_hot = torch.zeros(np.shape(target)[0], 2).scatter_(1, label, 1)
        one_hot = torch.LongTensor(label)
        # transpose the feats
        # vfeat0 = vfeat0.transpose(2, 1)
        # afeat0 = afeat0.transpose(2, 1)

        # put the data into Variable
        vfeat_var = Variable(vfeat0)
        afeat_var = Variable(afeat0)
        target_var = Variable(one_hot)

        # if you have gpu, then shift data to GPU
        if opt.cuda:
            vfeat_var = vfeat_var.cuda()
            afeat_var = afeat_var.cuda()
            target_var = target_var.cuda()

        # forward, backward optimize
        sim = model(vfeat_var, afeat_var,
                    train_status=True)  # inference simialrity
        loss = criterion(sim, target_var)  # compute contrastive loss

        # record the loss and distance to plot later
        #dis1_rec.append(list(dis1.data)[0])
        #dis2_rec.append(list(dis2.data)[0])
        loss_rec.append(list(loss.data)[0])

        ##############################
        # update loss in the loss meter
        ##############################
        losses.update(loss.data[0], vfeat0.size(0))

        ##############################
        # compute gradient and do sgd
        ##############################
        optimizer.zero_grad()
        loss.backward()

        ##############################
        # gradient clip stuff
        ##############################
        # utils.clip_gradient(optimizer, opt.gradient_clip)

        # update parameters
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % opt.print_freq == 0:
            log_str = 'Epoch: [{0}][{1}/{2}]\t Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                epoch,
                i,
                len(train_loader),
                batch_time=batch_time,
                loss=losses)
            print(log_str)
Exemplo n.º 23
0
def train(data_loader, model, criterion, optimizer_t, optimizer_m, epoch,
          stage, logger, args, epoch_dict):
    loss_avg = utils.AverageMeter()
    mse_avg = utils.AverageMeter()
    top1_cnn = utils.AverageMeter()
    top5_cnn = utils.AverageMeter()
    top1_res = utils.AverageMeter()
    top5_res = utils.AverageMeter()
    global_step = epoch * len(data_loader)
    model.train()
    if "TA" in stage:
        model.module.teacher.eval()
    else:
        model.module.teacher.train()

    logger.log("stage: {}".format(stage))
    m = Cosine(min_v=0.5, max_v=1., epoch=epoch)
    for step, (images, labels) in enumerate(data_loader):
        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)
        num_samples = images.size(0)
        optimizer_t.zero_grad()
        optimizer_m.zero_grad()
        if "TA" in stage:
            logits, logits_teacher, loss_dis = model(
                x=images,
                stage=stage,
                epoch=epoch,
                batch_pro=args.batch_pro,
                windowsize=args.windowsize)
            if stage == "TA1":
                loss = loss_KD_fn(criterion,
                                  logits,
                                  logits_teacher,
                                  targets=labels,
                                  alpha=args.alpha,
                                  temperature=args.temperature)
            elif stage == "TA2":
                loss = 0.
                for logit_student in logits[:-1]:

                    #loss += loss_KD_fn(criterion, logit_student, logits_teacher,
                    # targets = labels, alpha = args.alpha, temperature = args.temperature) * m * 0.25
                    loss += criterion(logit_student, labels) * m

                #loss += loss_KD_fn(criterion, logits[-1], logits_teacher,
                #                      targets = labels, alpha = args.alpha, temperature = args.temperature) * (1.0 - 3*m*0.25)
                loss += criterion(logits[-1], labels)
            loss_avg.update(loss.detach().item(), num_samples)
            if loss_dis is not None:
                for loss_d in loss_dis[:-1]:
                    loss += loss_d.mean() * m * 0.25 * args.dis_weight
                mse_avg.update(loss_dis[-1].detach().mean().item(),
                               num_samples)
                loss += loss_dis[-1].mean() * args.dis_weight

            #10^-3 for 32x32 image
            #10^-4 for 224x224 scale classification task
            #10^-5 for detection and segmentation task
            if isinstance(logits, list):
                prec1_cnn, prec5_cnn = utils.accuracy(logits[-1].detach(),
                                                      labels,
                                                      topk=(1, 5))
            else:
                prec1_cnn, prec5_cnn = utils.accuracy(logits.detach(),
                                                      labels,
                                                      topk=(1, 5))
            top1_cnn.update(prec1_cnn.item(), num_samples)
            top5_cnn.update(prec5_cnn.item(), num_samples)
        elif "RES_NMT" in stage:
            logits = model(images, stage=stage)
            loss = criterion(logits, labels)

            ## train mask
            mask = []
            mask_log = []
            for name, param in model.named_parameters():
                if 'mask' in name and "teacher" in name:
                    mask.append(param.view(-1))
                    mask_log.append(param.detach())

            mask = torch.cat(mask)
            error_sparse = args.sparse_lambda * torch.norm(mask, 1)
            error_sparse.backward()

            prec1_res, prec5_res = utils.accuracy(logits.detach(),
                                                  labels,
                                                  topk=(1, 5))
            top1_res.update(prec1_res.item(), num_samples)
            top5_res.update(prec5_res.item(), num_samples)
            loss_avg.update(loss.detach().data.item(), num_samples)
        elif "CNN_NMT" in stage:
            logits = model(images, stage=stage)
            loss = criterion(logits, labels)
            prec1_cnn, prec5_cnn = utils.accuracy(logits.detach(),
                                                  labels,
                                                  topk=(1, 5))
            top1_cnn.update(prec1_cnn.item(), num_samples)
            top5_cnn.update(prec5_cnn.item(), num_samples)
            loss_avg.update(loss.detach().data.item(), num_samples)
        elif "RES_KD" in stage:
            logit_student, logits_teacher = model(images, stage=stage)
            loss = loss_KD_fn(criterion,
                              logit_student,
                              logits_teacher,
                              targets=labels,
                              alpha=args.alpha,
                              temperature=args.temperature)
            prec1_res, prec5_res = utils.accuracy(logit_student.detach(),
                                                  labels,
                                                  topk=(1, 5))
            top1_res.update(prec1_res.item(), num_samples)
            top5_res.update(prec5_res.item(), num_samples)
            loss_avg.update(loss.detach().data.item(), num_samples)
        else:
            raise NameError("invalide stage nanme")
        loss.backward()
        optimizer_t.step()
        if epoch >= 1:
            optimizer_m.step()

        epochs = epoch_dict[stage]
        if step % 100 == 0 or step == len(data_loader) - 1:
            logger.log("Train, Epoch: [{:3d}/{}], Step: [{:3d}/{}], " \
                        "Loss: {:.4f}, Loss_dis: {:.4f}, Prec@(cnn1, res1, cnn5, res5): {:.4%},{:.4%}, {:.4%}, {:.4%}".format(
                            epoch, epochs, step, len(data_loader),
                            loss_avg.avg, mse_avg.avg, top1_cnn.avg, top1_res.avg, top5_cnn.avg, top5_res.avg))

        global_step += 1
    logger.log("mask:")
    logger.log(mask_log)
    logger.log(
        "Train, Epoch: [{:3d}/{}], Final Prec: cnn, res@1: {:.4%}, {:.4%},  Final Prec: cnn, res@5: {:.4%}, {:.4%} Loss: {:.4f}"
        .format(epoch, epochs, top1_cnn.avg, top1_res.avg, top5_cnn.avg,
                top5_res.avg, loss_avg.avg))
Exemplo n.º 24
0
def trainer(train_loader,
            valid_loader,
            model,
            criterion,
            optimizer_t,
            optimizer_s=None,
            lr_scheduler=None,
            stage=None):
    logger.log("start training..." + stage)
    best_top1 = 0.0
    epochs = args.baseline_epochs
    start_time = time.time()
    epoch_time = utils.AverageMeter()

    for epoch in range(args.start_epoch, epochs):
        ##################################adjust learning rate##################################
        if args.lr_sch == "cosine":
            if optimizer_t is not None:
                adjust_learning_rateD(optimizer_t,
                                      epoch,
                                      epochs,
                                      lr_max=args.learning_rate,
                                      lr_min=args.learning_rate * 1e-3)
            if optimizer_s is not None:
                adjust_learning_rateD(optimizer_s,
                                      epoch,
                                      epochs,
                                      lr_max=args.learning_rate,
                                      lr_min=args.learning_rate * 1e-3)
        elif args.lr_sch == "imagenet":
            if optimizer_t is not None:
                adjust_learning_rateA(optimizer_t, epoch, args)
            if optimizer_s is not None:
                adjust_learning_rateA(optimizer_s, epoch, args)
        elif args.lr_sch == "step":
            if optimizer_t is not None:
                adjust_learning_rateS(optimizer_t, epoch, args)
            if optimizer_s is not None:
                adjust_learning_rateS(optimizer_s, epoch, args)
        else:
            raise NameError("lrsch name error")
        ########################################################################################

        lr = optimizer_t.param_groups[0][
            "lr"] if optimizer_t else optimizer_s.param_groups[0]["lr"]
        need_hour, need_mins, need_secs = convert_secs2time(epoch_time.val *
                                                            (epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)
        logger.log(' [{:s}] :: {:3d}/{:3d} ----- [{:s}] {:s} LR={:}'.format(
            args.smodel_name, epoch, epochs, time_string(), need_time, lr))
        train(train_loader, model, criterion, optimizer_t, optimizer_s, epoch,
              stage, logger, args)
        global_step = (epoch + 1) * len(train_loader) - 1
        valid_top1 = valid(valid_loader,
                           model,
                           criterion,
                           epoch,
                           global_step,
                           stage=stage,
                           logger=logger,
                           args=args)

        if epoch == 0 or best_top1 < valid_top1:
            best_top1 = valid_top1
            is_best = True
        else:
            is_best = False

        if epoch >= 89:
            utils.save_checkpoint(model,
                                  logger.path('info'),
                                  is_best=is_best,
                                  pre=args.aim + "_" + "epoch_" + str(epoch) +
                                  "_" + stage)

        epoch_time.update(time.time() - start_time)
        start_time = time.time()

    logger.log("Final best valid Prec@1: {:.4%}".format(best_top1))
Exemplo n.º 25
0
def train(train_loader, model, criterion, optimizer, epoch, opt, num):
    """
    train for one epoch on the training set
    """
    batch_time = utils.AverageMeter()
    losses = utils.AverageMeter()

    # training mode
    model.train()

    end = time.time()

    global positive_rec
    global negative_rec
    global loss_rec

    for i, (vfeat, afeat) in enumerate(train_loader):
        # shuffling the index orders

        bz = vfeat.size()[0]
        orders = np.arange(bz).astype('int32')
        shuffle_orders = orders.copy()
        np.random.shuffle(shuffle_orders)

        # creating a new data with the shuffled indices
        afeat2 = afeat[torch.from_numpy(shuffle_orders).long()].clone()

        # concat the vfeat and afeat respectively
        afeat0 = torch.cat((afeat, afeat2), 0)
        vfeat0 = torch.cat((vfeat, vfeat), 0)

        # generating the labels
        # 1. the labels for the shuffled feats
        label1 = (orders == shuffle_orders + 0).astype('float32')
        target1 = torch.from_numpy(label1)

        # 2. the labels for the original feats
        label2 = label1.copy()
        label2[:] = 1
        target2 = torch.from_numpy(label2)

        # concat the labels together
        target = torch.cat((target2, target1), 0)
        target = 1 - target

        # transpose the feats
        # vfeat0 = vfeat0.transpose(2, 1)
        # afeat0 = afeat0.transpose(2, 1)

        # put the data into Variable
        vfeat_var = Variable(vfeat0)
        afeat_var = Variable(afeat0)
        target_var = Variable(target)

        # if you have gpu, then shift data to GPU
        if opt.cuda:
            vfeat_var = vfeat_var.cuda()
            afeat_var = afeat_var.cuda()
            target_var = target_var.cuda()
        sim = model(vfeat_var, afeat_var)
        loss = criterion(sim, target_var)

        loss_rec.append(list(loss.data)[0])
        positive_rec.append(list(torch.mean(sim[0:bz, 0]).data)[0])
        negative_rec.append(list(torch.mean(sim[bz:bz * 2, 0]).data)[0])

        # ##### for N pair loss
        # vfeat = Variable(vfeat)
        # afeat = Variable(afeat)
        # if opt.cuda:
        #     vfeat = vfeat.cuda()
        #     afeat = afeat.cuda()
        # bz = vfeat.size()[0]
        # for k in np.arange(bz):
        #     cur_vfeat = vfeat[k].clone()
        #     vfeat_k = cur_vfeat.repeat(bz, 1, 1)
        #     sim_k = model(vfeat_k, afeat)
        #     sim_k_0 = sim_k[:, 0]
        #     sim_k_1 = sim_k[:, 1]
        #     sim_k_0 = sim_k_0.resize(1, bz)
        #     sim_k_1 = sim_k_1.resize(1, bz)
        #     if k == 0:
        #         sim_0 = sim_k_0.clone()
        #         sim_1 = sim_k_1.clone()
        #     else:
        #         sim_0 = torch.cat((sim_0, sim_k_0), dim=0)
        #         sim_1 = torch.cat((sim_1, sim_k_1), dim=0)
        # loss = criterion(sim_0, sim_1)
        #
        # loss_rec.append(list(loss.data)[0])
        # positive_rec.append(list(torch.mean(torch.diag(sim_0)).data)[0])
        # sim_0 = sim_0 - torch.diag(torch.diag(sim_0))
        # negative_rec.append(list(torch.mean(sim_0).data)[0])

        ##############################
        # update loss in the loss meter
        ##############################
        losses.update(loss.data[0], vfeat.size(0))

        ##############################
        # compute gradient and do sgd
        ##############################
        optimizer.zero_grad()
        loss.backward()

        ##############################
        # gradient clip stuff
        ##############################
        torch.nn.utils.clip_grad_norm(model.parameters(), opt.gradient_clip)

        # update parameters
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % opt.print_freq == 0:
            log_str = 'No.{} Epoch: [{}][{}/{}]\t Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                num,
                epoch,
                i,
                len(train_loader),
                batch_time=batch_time,
                loss=losses)
            print(log_str)
Exemplo n.º 26
0
def main():
    parser = argparse.ArgumentParser(
        description="Configuration for training an APC model")

    parser.add_argument("--test_config")
    parser.add_argument("--use_cmvn",
                        default=False,
                        action='store_true',
                        help="Use cmvn or not")
    parser.add_argument("--batch_size",
                        default=32,
                        type=int,
                        help="Training minibatch size")
    parser.add_argument("--load_data_workers",
                        default=2,
                        type=int,
                        help="Number of parallel data loaders")
    parser.add_argument("--resume_model",
                        default='',
                        type=str,
                        help="Use cmvn or not")
    parser.add_argument("--print_freq",
                        default=100,
                        type=int,
                        help="Number of iter to print")
    parser.add_argument("--out_prob",
                        type=str,
                        help="output file to store phrase id log prob")
    parser.add_argument("--out_embedding",
                        type=str,
                        help="name of output embedding ark and scp file")
    parser.add_argument("--seed",
                        default=1,
                        type=int,
                        help="random number seed")

    args = parser.parse_args()
    with open(args.test_config) as f:
        config = yaml.safe_load(f)

    config['path_test'] = [j for i, j in config['test_data'].items()]
    for key, val in config.items():
        setattr(args, key, val)
    for var in vars(args):
        config[var] = getattr(args, var)
    print("Experiment starts with config {}".format(
        json.dumps(config, sort_keys=True, indent=4)))

    use_cuda = args.use_gpu
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    if use_cuda:
        torch.cuda.manual_seed(args.seed)

    enc_args = Config()
    for key, val in args.encoder.items():
        setattr(enc_args, key, val)
    shared_encoder = make_se(enc_args.input_size, enc_args.N, enc_args.N_embed,
                             enc_args.d_model, enc_args.d_ff, enc_args.h,
                             enc_args.dropout)

    dec_args = Config()
    for key, val in args.asr_decoder.items():
        setattr(dec_args, key, val)
    dec_args.d_model = enc_args.d_model
    model = make_model(shared_encoder, dec_args)

    if args.resume_model:
        resume_model = torch.load(args.resume_model, map_location='cpu')
        model.load_state_dict(resume_model)

    num_params = 0
    for name, param in model.named_parameters():
        num_params += param.numel()
    print("Number of parameters: {}".format(num_params))

    if use_cuda:
        model = model.cuda()

    testset = SpeechDataset(args.path_test, args.left_ctx, args.right_ctx,
                            args.skip_frame)
    if args.use_cmvn:
        testset._load_cmvn(args.global_cmvn)
    test_loader = SpeechDataLoader(testset,
                                   args.batch_size,
                                   num_workers=args.load_data_workers,
                                   shuffle=False)
    print("Finish Loading test files. Number batches: {}".format(
        len(test_loader)))

    batch_time = utils.AverageMeter('Time', ':6.3f')
    progress = utils.ProgressMeter(len(test_loader), batch_time)
    end = time.time()

    ark_writer = WriteHelper('ark,scp:{}.ark,{}.scp'.format(
        args.out_embedding, args.out_embedding))
    prob_writer = open(args.out_prob, 'w')

    with torch.no_grad():
        model.eval()
        for i, data in enumerate(test_loader):
            utt_list, feats, _, feat_sizes, _, _, _ = data
            batch_size, mask_size, _ = feats.size()
            feat_sizes /= 2

            if args.use_gpu:
                feats = feats.cuda()
                feat_sizes = feat_sizes.cuda()

            _, phrase_out, t_embedding = model(feats, feat_sizes)

            logprob = F.log_softmax(phrase_out, dim=-1)
            for j in range(len(utt_list)):
                ark_writer(utt_list[j], t_embedding[j].cpu().numpy())
                prob_writer.write(utt_list[j] + ' ' +
                                  str(logprob[j].cpu().numpy()) + '\n')

            batch_time.update(time.time() - end)
            if i % args.print_freq == 0:
                progress.print(i)