Exemplo n.º 1
0
def train(batch_processor, train_loader, model, criterion, optimizer, epoch,
          args, logger):
    batch_time = tools.AverageMeter('Time', ':6.3f')
    data_time = tools.AverageMeter('Data', ':6.3f')
    losses = tools.AverageMeter('Loss', ':.4e')
    progress = tools.ProgressMeter(len(train_loader),
                                   [batch_time, data_time, losses],
                                   prefix="Epoch: [{}]".format(epoch),
                                   logger=logger)

    # fix BN
    if args.fixed_BN:
        model.eval()
    else:
        model.train()
    criterion.train()
    end = time.time()
    for i, batch_data in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        batch_processor(args.gpu, batch_data, model, criterion, optimizer,
                        losses)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            progress.display(i)
Exemplo n.º 2
0
def infer(valid_queue, model, epoch, Latency,criterion, writer):
    batch_time = utils.AverageMeters('Time', ':6.3f')
    losses = utils.AverageMeters('Loss', ':.4e')
    top1 = utils.AverageMeters('Acc@1', ':6.2f')
    top5 = utils.AverageMeters('Acc@5', ':6.2f')

    # set chosen op active
    model.module.set_chosen_op_active()
    model.module.unused_modules_off()

    model.eval()

    progress = utils.ProgressMeter(len(valid_queue), batch_time, losses, top1, top5,
                                   prefix='Test: ')
    cur_step = epoch*len(valid_queue)

    end = time.time()
    with torch.no_grad():
        for step, (input, target) in enumerate(valid_queue):
            # input = input.cuda()
            # target = target.cuda(non_blocking=True)
            input = Variable(input, volatile=True).cuda()
            # target = Variable(target, volatile=True).cuda(async=True)
            target = Variable(target, volatile=True).cuda()
            logits = model(input)
            loss = criterion(logits, target)
            acc1, acc5 = utils.accuracy(logits, target, topk=(1, 5))
            n = input.size(0)
            reduced_loss = reduce_tensor(
                loss.data, world_size=config.world_size)
            acc1 = reduce_tensor(acc1, world_size=config.world_size)
            acc5 = reduce_tensor(acc5, world_size=config.world_size)
            losses.update(to_python_float(reduced_loss), n)
            top1.update(to_python_float(acc1), n)
            top5.update(to_python_float(acc5), n)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            shape = [1, 3, 224, 224]
            input_var = torch.zeros(shape, device=device)
            flops = model.module.get_flops(input_var)
            if config.target_hardware in [None, 'flops']:
                latency = 0
            else:
                latency = Latency.predict_latency(model)

            model.module.unused_modules_back()

            if step % config.print_freq == 0:
                progress.print(step)
                logger.info('valid %03d\t loss: %e\t top1: %f\t top5: %f\t flops: %f\t latency: %f', step,
                            losses.avg, top1.avg, top5.avg, flops/1e6, latency)

    writer.add_scalar('val/loss', losses.avg, cur_step)
    writer.add_scalar('val/top1', top1.avg, cur_step)
    writer.add_scalar('val/top5', top5.avg, cur_step)
    return top1.avg, losses.avg
Exemplo n.º 3
0
def train(train_loader, model, criterion, optimizer, epoch, args):
    AverageMeter = utils.AverageMeter
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':f')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = utils.ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch+1))

    # switch to train mode
    model.train()

    end = time.time()  

    pth_file_name = os.path.join(args.train_local, 'train_epoch_%s.npy'
                                        % (str(epoch + 1)))

    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            images = images.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)
        # target2 = target[1].cuda(args.gpu, non_blocking=True)
        # lam = target[2]
        #进行mixup操作  
        # images, targets_a, targets_b, lam = mixup_data(images, target, 1.)

        # compute output
        output = model(images)
        # loss = lam * criterion(output, target1) + (1 - lam) * criterion(output, target2)
        loss = criterion(output, target) #之前用交叉熵损失函数
        # loss = mixip_criterion(criterion, output, targets_a, targets_b, lam)
        
        # print(loss.item())
        # measure accuracy and record loss
        acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))

   
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            progress.display(i)
Exemplo n.º 4
0
def train(args, model, train_loader, optimizer, epoch, training_step, writer):
    losses = utils.AverageMeter("Loss", ":.6f")
    progress = utils.ProgressMeter(len(train_loader), [losses],
                                   prefix="Epoch: [{}]".format(epoch))
    model.train()
    for batch_idx, batch in enumerate(train_loader):
        batch = [tensor.cuda() for tensor in batch]
        (
            obs_traj,
            pred_traj_gt,
            obs_traj_rel,
            pred_traj_gt_rel,
            non_linear_ped,
            loss_mask,
            seq_start_end,
        ) = batch
        optimizer.zero_grad()
        loss = torch.zeros(1).to(pred_traj_gt)
        l2_loss_rel = []
        loss_mask = loss_mask[:, args.obs_len:]

        if training_step == 1 or training_step == 2:
            model_input = obs_traj_rel
            pred_traj_fake_rel = model(model_input, obs_traj, seq_start_end, 1,
                                       training_step)
            l2_loss_rel.append(
                l2_loss(pred_traj_fake_rel, model_input, loss_mask,
                        mode="raw"))
        else:
            model_input = torch.cat((obs_traj_rel, pred_traj_gt_rel), dim=0)
            for _ in range(args.best_k):
                pred_traj_fake_rel = model(model_input, obs_traj,
                                           seq_start_end, 0)
                l2_loss_rel.append(
                    l2_loss(
                        pred_traj_fake_rel,
                        model_input[-args.pred_len:],
                        loss_mask,
                        mode="raw",
                    ))

        l2_loss_sum_rel = torch.zeros(1).to(pred_traj_gt)
        l2_loss_rel = torch.stack(l2_loss_rel, dim=1)
        for start, end in seq_start_end.data:
            _l2_loss_rel = torch.narrow(l2_loss_rel, 0, start, end - start)
            _l2_loss_rel = torch.sum(_l2_loss_rel, dim=0)  # [20]
            _l2_loss_rel = torch.min(_l2_loss_rel) / (
                (pred_traj_fake_rel.shape[0]) * (end - start))
            l2_loss_sum_rel += _l2_loss_rel

        loss += l2_loss_sum_rel
        losses.update(loss.item(), obs_traj.shape[1])
        loss.backward()
        optimizer.step()
        if batch_idx % args.print_every == 0:
            progress.display(batch_idx)
    writer.add_scalar("train_loss", losses.avg, epoch)
Exemplo n.º 5
0
def train(train_loader, model, criterion, optimizer, epoch, args):
    AverageMeter = utils.AverageMeter
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':f')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = utils.ProgressMeter(len(train_loader),
                                   [batch_time, data_time, losses, top1, top5],
                                   prefix="Epoch: [{}]".format(epoch + 1))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):

        # print(images.shape)
        # print(target.size())
        # exit()

        # measure data loading time
        data_time.update(time.time() - end)

        images = images.cuda()
        target = target.cuda(async=True)

        images, target = torch.autograd.Variable(
            images), torch.autograd.Variable(target)

        # forward
        output = model(images)

        loss = criterion(output, target)
        # print(loss.item())
        # exit()

        # measure accuracy and record loss
        acc1, acc5 = utils.accuracy(output.data, target.data, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            progress.display(i)

    return (losses.avg, top1.avg, top5.avg)
Exemplo n.º 6
0
def validate_warmup(valid_queue, model, epoch, criterion, writer):
    batch_time = utils.AverageMeters('Time', ':6.3f')
    losses = utils.AverageMeters('Loss', ':.4e')
    top1 = utils.AverageMeters('Acc@1', ':6.2f')
    top5 = utils.AverageMeters('Acc@5', ':6.2f')
    model.train()

    progress = utils.ProgressMeter(len(valid_queue),
                                   batch_time,
                                   losses,
                                   top1,
                                   top5,
                                   prefix='Warmup-Test: ')
    cur_step = epoch * len(valid_queue)

    end = time.time()
    with torch.no_grad():
        for step, (input, target) in enumerate(valid_queue):
            # input = input.cuda()
            # target = target.cuda(non_blocking=True)
            input = Variable(input, volatile=True).cuda()
            # target = Variable(target, volatile=True).cuda(async=True)
            target = Variable(target, volatile=True).cuda()
            logits = model(input)
            loss = criterion(logits, target)
            acc1, acc5 = utils.accuracy(logits, target, topk=(1, 5))
            n = input.size(0)

            losses.update(loss, n)
            top1.update(acc1, n)
            top5.update(acc5, n)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if step % config.print_freq == 0:
                progress.print(step)
                logger.info('warmup-valid %03d %e %f %f', step, losses.avg,
                            top1.avg, top5.avg)

    writer.add_scalar('warmup-val/loss', losses.avg, cur_step)
    writer.add_scalar('warmup-val/top1', top1.avg, cur_step)
    writer.add_scalar('warmup-val/top5', top5.avg, cur_step)
    return top1.avg, top5.avg, losses.avg
Exemplo n.º 7
0
def validate(val_loader, model, criterion, args):
    global best_acc1
    AverageMeter = utils.AverageMeter
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':f')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = utils.ProgressMeter(len(val_loader),
                                   [batch_time, losses, top1, top5],
                                   prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            images = images.cuda()
            target = target.cuda()
            images, target = torch.autograd.Variable(
                images), torch.autograd.Variable(target)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = utils.accuracy(output.data, target.data, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1,
                                                                    top5=top5))

    return losses.avg, top1.avg, top5.avg
Exemplo n.º 8
0
def validate(args, model, val_loader, epoch, writer):
    ade = utils.AverageMeter("ADE", ":.6f")
    fde = utils.AverageMeter("FDE", ":.6f")
    progress = utils.ProgressMeter(len(val_loader), [ade, fde],
                                   prefix="Test: ")

    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(val_loader):
            batch = [tensor.cuda() for tensor in batch]
            (
                obs_traj,
                pred_traj_gt,
                obs_traj_rel,
                pred_traj_gt_rel,
                non_linear_ped,
                loss_mask,
                seq_start_end,
            ) = batch
            loss_mask = loss_mask[:, args.obs_len:]
            pred_traj_fake_rel = model(obs_traj_rel, obs_traj, seq_start_end)

            pred_traj_fake_rel_predpart = pred_traj_fake_rel[-args.pred_len:]
            pred_traj_fake = relative_to_abs(pred_traj_fake_rel_predpart,
                                             obs_traj[-1])
            ade_, fde_ = cal_ade_fde(pred_traj_gt, pred_traj_fake)
            ade_ = ade_ / (obs_traj.shape[1] * args.pred_len)

            fde_ = fde_ / (obs_traj.shape[1])
            ade.update(ade_, obs_traj.shape[1])
            fde.update(fde_, obs_traj.shape[1])

            if i % args.print_every == 0:
                progress.display(i)

        logging.info(" * ADE  {ade.avg:.3f} FDE  {fde.avg:.3f}".format(
            ade=ade, fde=fde))
        writer.add_scalar("val_ade", ade.avg, epoch)
    return ade.avg
Exemplo n.º 9
0
def train(train_loader, model, optimizer, epoch, args):
    batch_time = utils.AverageMeter('Time', '6.3f')
    data_time = utils.AverageMeter('Data', '6.3f')

    # save images to investigate
    inv_normalize = transforms.Normalize(
        mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225],
        std=[1 / 0.229, 1 / 0.224, 1 / 0.225])

    inv_transform = transforms.Compose(
        [inv_normalize, transforms.ToPILImage()])

    os.makedirs("{}/train_images".format(args.save_folder), exist_ok=True)
    img_ctr = 0

    loss_meters = []
    loss_updates = []

    meter = utils.AverageMeter('Total Loss', '.4e')
    loss_updates.append(
        (lambda m: lambda _, l_total, bs: m.update(l_total, bs)
         )(meter))  # lam for closure
    loss_meters.extend([meter, utils.ProgressMeter.BR])

    if args.moco_contr_w != 0:
        meter = utils.AverageMeter('Contr-Loss', '.4e')
        acc1 = utils.AverageMeter('Contr-Acc1', '6.2f')
        acc5 = utils.AverageMeter('Contr-Acc5', '6.2f')

        def f(meter, macc1, macc5):  # closure
            def accuracy(output, target=0, topk=(1, )):
                """Computes the accuracy over the k top predictions for the specified values of k"""
                with torch.no_grad():
                    maxk = max(topk)
                    batch_size = output.size(0)

                    _, pred = output.topk(maxk, 1, True, True)
                    pred = pred.t()
                    correct = (pred == 0)

                    res = []
                    for k in topk:
                        correct_k = correct[:k].view(-1).float().sum()
                        res.append(correct_k.mul_(100.0 / batch_size))
                    return res

            def update(losses, _, bs):
                meter.update(losses.loss_contr, bs)
                acc1, acc5 = accuracy(losses.logits_contr, topk=(1, 5))
                macc1.update(acc1, bs)
                macc5.update(acc5, bs)

            return update

        loss_updates.append(f(meter, acc1, acc5))
        loss_meters.extend([meter, acc1, acc5, utils.ProgressMeter.BR])

    if args.moco_align_w != 0:
        meter = utils.AverageMeter('Align-Loss', '.4e')
        loss_updates.append(
            (lambda m: lambda losses, _, bs: m.update(losses.loss_align, bs)
             )(meter))  # lam for closure
        loss_meters.append(meter)

    if args.moco_unif_w != 0:
        meter = utils.AverageMeter('Unif-Loss', '.4e')
        loss_updates.append(
            (lambda m: lambda losses, _, bs: m.update(losses.loss_unif)
             )(meter))  # lam for closure
        loss_meters.append(meter)

    if len(loss_meters) and loss_meters[-1] == utils.ProgressMeter.BR:
        loss_meters = loss_meters[:-1]

    progress = utils.ProgressMeter(len(train_loader),
                                   [batch_time, data_time] + loss_meters,
                                   prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    # for i, (images, _) in enumerate(train_loader):
    for i, (_, images, target, _) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        images[0] = images[0].cuda(args.gpu, non_blocking=True)
        images[1] = images[1].cuda(args.gpu, non_blocking=True)

        # save images to investigate
        if epoch == 0 and i < 10:
            for batch_index in range(images[0].size(0)):
                if int(target[batch_index].item()) == 26:
                    img_ctr = img_ctr + 1
                    inv_image1 = inv_transform(images[0][batch_index].cpu())
                    inv_image1.save(
                        "{}/train_images/".format(args.save_folder) +
                        str(img_ctr).zfill(5) + '_view_0' + '.png')
                    inv_image2 = inv_transform(images[1][batch_index].cpu())
                    inv_image2.save(
                        "{}/train_images/".format(args.save_folder) +
                        str(img_ctr).zfill(5) + '_view_1' + '.png')

        # compute losses
        moco_losses = model(im_q=images[0], im_k=images[1])
        total_loss = moco_losses.combine(contr_w=args.moco_contr_w,
                                         align_w=args.moco_align_w,
                                         unif_w=args.moco_unif_w)

        # record loss
        if args.index == 0:
            bs = images[0].shape[0]
            for update_fn in loss_updates:
                update_fn(moco_losses, total_loss, bs)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0 and args.index == 0:
            progress.display(i)
Exemplo n.º 10
0
def train(train_queue, valid_queue, model, criterion, LatencyLoss, optimizer,
          alpha_optimizer, lr, epoch, writer, update_schedule):

    arch_param_num = np.sum(
        np.prod(params.size()) for params in model.module.arch_parameters())
    binary_gates_num = len(list(model.module.binary_gates()))
    weight_param_num = len(list(model.module.weight_parameters()))
    print('#arch_params: %d\t#binary_gates: %d\t#weight_params: %d' %
          (arch_param_num, binary_gates_num, weight_param_num))

    batch_time = utils.AverageMeters('Time', ':6.3f')
    data_time = utils.AverageMeters('Data', ':6.3f')
    losses = utils.AverageMeters('Loss', ':.4e')
    top1 = utils.AverageMeters('Acc@1', ':6.2f')
    top5 = utils.AverageMeters('Acc@5', ':6.2f')
    entropy = utils.AverageMeters('Entropy', ':.4e')

    progress = utils.ProgressMeter(len(train_queue),
                                   batch_time,
                                   data_time,
                                   losses,
                                   top1,
                                   top5,
                                   prefix="Epoch: [{}]".format(epoch))
    cur_step = epoch * len(train_queue)
    writer.add_scalar('train/lr', lr, cur_step)

    model.train()
    end = time.time()
    for step, (input, target) in enumerate(train_queue):

        # measure data loading time
        data_time.update(time.time() - end)

        net_entropy = model.module.entropy()
        entropy.update(net_entropy.data.item() / arch_param_num, 1)

        # sample random path
        model.module.reset_binary_gates()
        # close unused module
        model.module.unused_modules_off()

        n = input.size(0)
        input = Variable(input, requires_grad=False).cuda()
        # target = Variable(target, requires_grad=False).cuda(async=True)
        target = Variable(target, requires_grad=False).cuda()

        logits = model(input)
        if config.label_smooth > 0.0:
            loss = utils.cross_entropy_with_label_smoothing(
                logits, target, config.label_smooth)
        else:
            loss = criterion(logits, target)

        acc1, acc5 = utils.accuracy(logits, target, topk=(1, 5))

        losses.update(loss, n)
        top1.update(acc1, n)
        top5.update(acc5, n)
        model.zero_grad()

        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), config.grad_clip)
        optimizer.step()
        # unused module back
        model.module.unused_modules_back()

        # Training weights firstly, after few epoch, train arch parameters
        if epoch > 0:
            #### office warm up lr ####
            # T_cur = epoch * len(train_queue) + step
            # lr_max = 0.05
            # T_totol = config.warmup_eforhs * len(train_queue)
            # lr = 0.5 * lr_max * (1 + math.cos(math.pi * T_cur / T_total))
            #### office warm up lr ####
            for j in range(update_schedule.get(step, 0)):
                model.train()
                latency_loss = 0
                expected_loss = 0

                valid_iter = iter(valid_queue)
                input_valid, target_valid = next(valid_iter)
                # alpha_optimizer.zero_grad()
                input_valid = Variable(input_valid, requires_grad=False).cuda()
                # target = Variable(target, requires_grad=False).cuda(async=True)
                target_valid = Variable(target_valid,
                                        requires_grad=False).cuda()
                model.module.reset_binary_gates()
                model.module.unused_modules_off()
                output_valid = model(input_valid).float()
                loss_ce = criterion(output_valid, target_valid)
                expected_loss = LatencyLoss.expected_latency(model)
                expected_loss_tensor = torch.cuda.FloatTensor([expected_loss])
                latency_loss = LatencyLoss(loss_ce, expected_loss_tensor,
                                           config)
                # compute gradient and do SGD step
                # zero grads of weight_param, arch_param & binary_param
                model.zero_grad()
                latency_loss.backward()
                # set architecture parameter gradients
                model.module.set_arch_param_grad()
                alpha_optimizer.step()
                model.module.rescale_updated_arch_param()
                model.module.unused_modules_back()
                log_str = 'Architecture [%d-%d]\t Loss %.4f\t %s LatencyLoss: %s' % (
                    epoch, step, latency_loss, config.target_hardware,
                    expected_loss)
                utils.write_log(arch_logger_path, log_str)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if step % config.print_freq == 0 or step == len(train_queue) - 1:
            logger.info('train step:%03d %03d  loss:%e top1:%05f top5:%05f',
                        step, len(train_queue), losses.avg, top1.avg, top5.avg)
            progress.print(step)
    writer.add_scalar('train/loss', losses.avg, cur_step)
    writer.add_scalar('train/top1', top1.avg, cur_step)
    writer.add_scalar('train/top5', top5.avg, cur_step)

    return top1.avg, losses.avg
Exemplo n.º 11
0
def warm_up(train_queue, valid_queue, model, criterion, Latency, optimizer,
            epoch, writer):
    batch_time = utils.AverageMeters('Time', ':6.3f')
    data_time = utils.AverageMeters('Data', ':6.3f')
    losses = utils.AverageMeters('Loss', ':.4e')
    top1 = utils.AverageMeters('Acc@1', ':6.2f')
    top5 = utils.AverageMeters('Acc@5', ':6.2f')
    progress = utils.ProgressMeter(len(train_queue),
                                   batch_time,
                                   data_time,
                                   losses,
                                   top1,
                                   top5,
                                   prefix="Epoch: [{}]".format(epoch))
    cur_step = epoch * len(train_queue)
    model.train()
    print('\n', '-' * 30, 'Warmup epoch: %d' % (epoch), '-' * 30, '\n')
    end = time.time()
    lr = 0
    for step, (input, target) in enumerate(train_queue):
        # measure data loading time
        data_time.update(time.time() - end)
        # office warm up lr #l'r
        T_cur = epoch * len(train_queue) + step
        lr_max = 0.05
        T_total = config.warmup_epochs * len(train_queue)
        lr = 0.5 * lr_max * (1 + math.cos(math.pi * T_cur / T_total))
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        writer.add_scalar('warm-up/lr', lr, cur_step + step)

        #### office warm up lr ####

        n = input.size(0)
        input = Variable(input, requires_grad=False).cuda()
        # target = Variable(target, requires_grad=False).cuda(async=True)
        target = Variable(target, requires_grad=False).cuda()

        model.module.reset_binary_gates()
        model.module.unused_modules_off()

        logits = model(input)
        if config.label_smooth > 0 and epoch > config.warmup_epochs:
            loss = utils.cross_entropy_with_label_smoothing(
                logits, target, config.label_smooth)
        else:
            loss = criterion(logits, target)
        model.zero_grad()
        loss.backward()
        optimizer.step()

        acc1, acc5 = utils.accuracy(logits, target, topk=(1, 5))
        losses.update(loss, n)
        top1.update(acc1, n)
        top5.update(acc5, n)

        # unused modules back
        model.module.unused_modules_back()
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if step % config.print_freq == 0 or step == len(train_queue) - 1:
            logger.info(
                'warmup train step:%03d %03d  loss:%e top1:%05f top5:%05f',
                step, len(train_queue), losses.avg, top1.avg, top5.avg)
            progress.print(step)
        writer.add_scalar('warmup-train/loss', losses.avg, cur_step)
        writer.add_scalar('warmup-train/top1', top1.avg, cur_step)
        writer.add_scalar('warmup-train/top5', top5.avg, cur_step)

    logger.info('warmup epoch %d lr %e', epoch, lr)
    # set chosen op active
    model.module.set_chosen_op_active()
    # remove unused modules
    model.module.unused_modules_off()
    valid_top1, valid_top5, valid_loss = validate_warmup(
        valid_queue, model, epoch, criterion, writer)
    shape = [1, 3, 224, 224]
    input_var = torch.zeros(shape, device=device)
    flops = model.module.get_flops(input_var)
    latency = 0
    if config.target_hardware in [None, 'flops']:
        latency = 0
    else:
        latency = Latency.predict_latency(model)
    # unused modules back
    logger.info(
        'Warmup Valid [{0}/{1}]\tloss {2:.3f}\ttop-1 acc {3:.3f}\ttop-5 acc '
        '{4:.3f}\tflops: {5:.1f}M {6:.3f}ms'.format(epoch,
                                                    config.warmup_epochs,
                                                    valid_loss, valid_top1,
                                                    valid_top5, flops / 1e6,
                                                    latency))
    model.module.unused_modules_back()

    config.warmup = epoch + 1 < config.warmup_epochs
    state_dict = model.state_dict()
    # rm architect params and binary getes
    for key in list(state_dict.keys()):
        if 'alpha' in key or 'path' in key:
            state_dict.pop(key)
    checkpoint = {
        'state_dict': state_dict,
        'warmup': config.warmup,
    }
    if config.warmup:
        checkpoint['warmup_epoch'] = epoch

    checkpoint['epoch'] = epoch
    checkpoint['w_optimizer'] = optimizer.state_dict()

    save_model(model, checkpoint, model_name='warmup.pth.tar')
    return top1.avg, losses.avg
Exemplo n.º 12
0
def validate(val_loader, model, criterion,epoch, args):
    AverageMeter = utils.AverageMeter
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':f')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = utils.ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

     # 使用数组保存
    dict1 = {
            'vector':[],
            'label':[],
    }
    pth_file_name = os.path.join(args.train_local, 'epoch_%s.pt'
                                        % (str(epoch + 1)))


    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))

            if args.save_vector:
                # 存入数组
                dict1['vector'].append(output)
                dict1['label'].append(target)

            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))
    #  # 保存文件
    # if args.save_vector:
    #     torch.save(dict1, pth_file_name)
    #     # np.save(pth_file_name,dict1)
    #     if args.train_url.startswith('s3'):
    #             mox.file.copy(pth_file_name,
    #                         args.train_url + '/' + os.path.basename(pth_file_name))
    #             os.remove(pth_file_name)
    if args.save_vector:
        return top1.avg,losses.avg,dict1
    return top1.avg,losses.avg
Exemplo n.º 13
0
    def run(self, data_loader, prefix, epoch, metrics_calc):
        batch_time_meter = utils.AverageMeter('Time')
        data_time_meter = utils.AverageMeter('Data')
        other_meters = []

        progress_display_made = False
        start_time = time.time()

        for i, batch in enumerate(data_loader):
            batch_number = epoch * len(data_loader) + i + 1
            data_time_meter.update(time.time() - start_time,
                                   n=self.get_batch_size(batch))

            # if batch_number % constants.INTERMITTENT_OUTPUT_FREQ == 0:
            #    self.intermittent_introspection(batch, batch_number)

            # transfer from CPU -> GPU asynchronously if at all
            if torch.cuda.is_available():
                if type(batch) != type([]) and type(batch) != type({}):
                    batch = batch.cuda(non_blocking=True)
                elif type(batch) == type([]):
                    for j in range(len(batch)):
                        batch[j] = batch[j].cuda(non_blocking=True)
                else:  # type(batch) == type({})
                    for key in batch.keys():
                        if self.keys_for_gpu is None or key in self.keys_for_gpu:
                            batch[key] = batch[key].cuda(non_blocking=True)

            metrics = metrics_calc(batch)
            # loss.backward is called in metrics_calc
            if metrics is not None:
                for j, (metric_name, metric_val) in enumerate(metrics):
                    self.writer.add_scalar(
                        os.path.join(self.name, prefix + '_' + metric_name),
                        metric_val, self.global_step)

                    if not progress_display_made:
                        other_meters.append(utils.AverageMeter(metric_name))
                        other_meters[j].update(metric_val,
                                               n=self.get_batch_size(batch))
                    else:
                        other_meters[j].update(metric_val,
                                               n=self.get_batch_size(batch))

                self.global_step += 1

                if not progress_display_made:
                    progress = utils.ProgressMeter(len(data_loader), other_meters + \
                        [batch_time_meter, data_time_meter], prefix=prefix)
                    progress_display_made = True
            elif not progress_display_made:
                progress = utils.ProgressMeter(
                    len(data_loader), [batch_time_meter, data_time_meter],
                    prefix=prefix)

            batch_time_meter.update(time.time() - start_time,
                                    n=self.get_batch_size(batch))
            start_time = time.time()

            if i % constants.PRINT_FREQ == 0:
                progress.display(i + 1, epoch)

        if i % constants.PRINT_FREQ != 0:
            progress.display(i + 1, epoch)
Exemplo n.º 14
0
def validate(val_loader, model, epoch, criterion, config, early_stopping,
             writer, start):
    batch_time = utils.AverageMeters('Time', ':6.3f')
    losses = utils.AverageMeters('Loss', ':.4e')
    top1 = utils.AverageMeters('Acc@1', ':6.2f')
    top5 = utils.AverageMeters('Acc@5', ':6.2f')
    if 'DALIClassificationIterator' in val_loader.__class__.__name__:
        progress = utils.ProgressMeter(math.ceil(val_loader._size /
                                                 config.batch_size),
                                       batch_time,
                                       losses,
                                       top1,
                                       top5,
                                       prefix='Test: ')
    else:
        progress = utils.ProgressMeter(len(val_loader),
                                       batch_time,
                                       losses,
                                       top1,
                                       top5,
                                       prefix='Test: ')
    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        if 'DALIClassificationIterator' in val_loader.__class__.__name__:
            for i, data in enumerate(val_loader):
                images = Variable(data[0]['data'])
                target = Variable(
                    data[0]['label'].squeeze().cuda().long().cuda(
                        non_blocking=True))

                # compute output
                output = model(images)
                loss = criterion(output, target)

                # measure accuracy and record loss
                acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
                if config.distributed:
                    reduced_loss = reduce_tensor(loss.data,
                                                 world_size=config.world_size)
                    acc1 = reduce_tensor(acc1, world_size=config.world_size)
                    acc5 = reduce_tensor(acc5, world_size=config.world_size)
                else:
                    reduced_loss = loss.data
                losses.update(to_python_float(reduced_loss), images.size(0))
                top1.update(to_python_float(acc1), images.size(0))
                top5.update(to_python_float(acc5), images.size(0))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                if i % config.print_freq == 0:
                    progress.print(i)
        else:
            for i, (images, target) in enumerate(val_loader):
                images = images.cuda(device, non_blocking=True)
                target = target.cuda(device, non_blocking=True)

                # compute output
                output = model(images)
                loss = criterion(output, target)

                # measure accuracy and record loss
                acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
                losses.update(loss.item(), images.size(0))
                top1.update(acc1[0], images.size(0))
                top5.update(acc5[0], images.size(0))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                if i % config.print_freq == 0:
                    progress.print(i)
        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1,
                                                                    top5=top5))

        early_stopping(losses.avg, model, ckpt_dir=config.path)
        if early_stopping.early_stop:
            print("Early stopping")
            utils.time(time.time() - start)
            os._exit(0)
        writer.add_scalar('val/loss', losses.avg, epoch)
        writer.add_scalar('val/top1', top1.val, epoch)
        writer.add_scalar('val/top5', top5.val, epoch)
    return top1.avg
Exemplo n.º 15
0
def train(train_loader, model, criterion, optimizer, epoch, config, writer):
    utils.adjust_learning_rate(optimizer, epoch, config)
    batch_time = utils.AverageMeters('Time', ':6.3f')
    data_time = utils.AverageMeters('Data', ':6.3f')
    losses = utils.AverageMeters('Loss', ':.4e')
    top1 = utils.AverageMeters('Acc@1', ':6.2f')
    top5 = utils.AverageMeters('Acc@5', ':6.2f')
    if 'DALIClassificationIterator' in train_loader.__class__.__name__:
        # TODO: IF need * config.world_size
        progress = utils.ProgressMeter(math.ceil(train_loader._size /
                                                 config.batch_size),
                                       batch_time,
                                       data_time,
                                       losses,
                                       top1,
                                       top5,
                                       prefix="Epoch: [{}]".format(epoch))
        cur_step = epoch * math.ceil(train_loader._size / config.batch_size)
    else:
        progress = utils.ProgressMeter(len(train_loader),
                                       batch_time,
                                       data_time,
                                       losses,
                                       top1,
                                       top5,
                                       prefix="Epoch: [{}]".format(epoch))

        cur_step = epoch * len(train_loader)
    writer.add_scalar('train/lr', config.lr, cur_step)

    model.train()

    end = time.time()
    if 'DALIClassificationIterator' in train_loader.__class__.__name__:
        for i, data in enumerate(train_loader):
            # measure data loading time
            data_time.update(time.time() - end)

            images = Variable(data[0]['data'])
            target = Variable(data[0]['label'].squeeze().cuda().long())

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))

            if config.distributed:
                reduced_loss = reduce_tensor(loss.data,
                                             world_size=config.world_size)
                acc1 = reduce_tensor(acc1, world_size=config.world_size)
                acc5 = reduce_tensor(acc5, world_size=config.world_size)
            else:
                reduced_loss = loss.data
            losses.update(to_python_float(reduced_loss), images.size(0))
            top1.update(to_python_float(acc1), images.size(0))
            top5.update(to_python_float(acc5), images.size(0))

            # compute gradient and do SGD step
            optimizer.zero_grad()
            if config.fp16_allreduce:
                optimizer.backward(loss)
            else:
                loss.backward()
            optimizer.step()
            torch.cuda.synchronize()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i % config.print_freq == 0:
                progress.print(i)
            writer.add_scalar('train/loss', loss.item(), cur_step)
            writer.add_scalar('train/top1', top1.avg, cur_step)
            writer.add_scalar('train/top5', top5.avg, cur_step)
    else:
        for i, (images, target) in enumerate(train_loader):
            # measure data loading time
            data_time.update(time.time() - end)

            images = images.cuda(device, non_blocking=True)
            target = target.cuda(device, non_blocking=True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1.item(), images.size(0))
            top5.update(acc5.item(), images.size(0))

            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i % config.print_freq == 0:
                progress.print(i)

            writer.add_scalar('train/loss', loss.item(), cur_step)
            writer.add_scalar('train/top1', top1.avg, cur_step)
            writer.add_scalar('train/top5', top5.avg, cur_step)