示例#1
0
def evaluate(args):
  assert torch.cuda.is_available(), 'CUDA is not available.'
  torch.backends.cudnn.enabled   = True
  torch.backends.cudnn.benchmark = True

  print ('The image is {:}'.format(args.image))
  print ('The model is {:}'.format(args.model))
  snapshot = Path(args.model)
  assert snapshot.exists(), 'The model path {:} does not exist'
  print ('The face bounding box is {:}'.format(args.face))
  assert len(args.face) == 4, 'Invalid face input : {:}'.format(args.face)
  snapshot = torch.load(snapshot)

  mean_fill   = tuple( [int(x*255) for x in [0.485, 0.456, 0.406] ] )
  normalize   = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
  param = snapshot['args']
  eval_transform  = transforms.Compose([transforms.PreCrop(param.pre_crop_expand), transforms.TrainScale2WH((param.crop_width, param.crop_height)),  transforms.ToTensor(), normalize])

  net = models.__dict__[param.arch](param.modelconfig, None)

  net = net.cuda()
  weights = models.remove_module_dict(snapshot['state_dict'])
  net.load_state_dict(weights)

  dataset = datasets.GeneralDataset(eval_transform, param.sigma, param.downsample, param.heatmap_type, param.dataset_name)
  dataset.reset(param.num_pts)

  print ('[{:}] prepare the input data'.format(time_string()))
  [image, _, _, _, _, _, cropped_size], meta = dataset.prepare_input(args.image, args.face)
  inputs = image.unsqueeze(0).cuda()
  print ('[{:}] prepare the input data done'.format(time_string()))
  # network forward
  with torch.no_grad():
    batch_heatmaps, batch_locs, batch_scos, _ = net(inputs)
  print ('[{:}] the network forward done'.format(time_string()))

  # obtain the locations on the image in the orignial size
  cpu = torch.device('cpu')
  np_batch_locs, np_batch_scos, cropped_size = batch_locs.to(cpu).numpy(), batch_scos.to(cpu).numpy(), cropped_size.numpy()
  locations, scores = np_batch_locs[0,:-1,:], np.expand_dims(np_batch_scos[0,:-1], -1)

  scale_h, scale_w = cropped_size[0] * 1. / inputs.size(-2) , cropped_size[1] * 1. / inputs.size(-1)

  locations[:, 0], locations[:, 1] = locations[:, 0] * scale_w + cropped_size[2], locations[:, 1] * scale_h + cropped_size[3]
  prediction = np.concatenate((locations, scores), axis=1).transpose(1,0)
  for i in range(param.num_pts):
    point = prediction[:, i]
    print ('{:02d}/{:02d} : ({:.1f}, {:.1f}), score = {:.3f}'.format(i, param.num_pts, float(point[0]), float(point[1]), float(point[2])))
示例#2
0
def train(train_loader, train_loader1, model, criterion, optimizer, var_optimizer, epoch, args, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    rk_losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    train_loader1_iter = iter(train_loader1)

    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        data_time.update(time.time() - end)

        input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)


        input1 = next(train_loader1_iter)
        input1 = input1.cuda(args.gpu, non_blocking=True)

        bs = input.shape[0]
        bs1 = input1.shape[0]

        output = model(torch.cat([input, input1.repeat(2, 1, 1, 1)]))
        loss = criterion(output[:bs], target)

        out1_0 = output[bs:bs+bs1].softmax(-1)
        out1_1 = output[bs+bs1:].softmax(-1)
        mi1 = ent((out1_0 + out1_1)/2.) - (ent(out1_0) + ent(out1_1))/2.
        rank_loss = torch.nn.functional.relu(args.mi_th - mi1).mean()

        prec1, prec5 = accuracy(output[:bs], target, topk=(1, 5))
        losses.update(loss.detach().item(), bs)
        rk_losses.update(rank_loss.detach().item(), bs1)
        top1.update(prec1.item(), bs)
        top5.update(prec5.item(), bs)

        optimizer.zero_grad()
        var_optimizer.zero_grad()
        (loss+rank_loss*args.alpha).backward()
        optimizer.step()
        var_optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        if i == len(train_loader) - 1:
            print_log('  Epoch: [{:03d}][{:03d}/{:03d}]   '
                        'Time {batch_time.avg:.3f}   '
                        'Data {data_time.avg:.3f}   '
                        'Loss {loss.avg:.4f}   '
                        'RK Loss {rk_loss.avg:.4f}   '
                        'Prec@1 {top1.avg:.3f}   '
                        'Prec@5 {top5.avg:.3f}   '.format(
                        epoch, i, len(train_loader), batch_time=batch_time, rk_loss=rk_losses,
                        data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log)
    return top1.avg, losses.avg
def train(train_loader, model, criterion, optimizer, epoch, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.use_cuda:
            target = target.cuda(
                async=True
            )  # the copy will be asynchronous with respect to the host.
            input = input.cuda()

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_log(
                '  Epoch: [{:03d}][{:03d}/{:03d}]   '
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})   '
                'Data {data_time.val:.3f} ({data_time.avg:.3f})   '
                'Loss {loss.val:.4f} ({loss.avg:.4f})   '
                'Prec@1 {top1.val:.3f} ({top1.avg:.3f})   '
                'Prec@5 {top5.val:.3f} ({top5.avg:.3f})   '.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    loss=losses,
                    top1=top1,
                    top5=top5) + time_string(), log)
    print_log(
        '  **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'
        .format(top1=top1, top5=top5, error1=100 - top1.avg), log)
    return top1.avg, losses.avg
示例#4
0
def train(train_loader, model, criterion, optimizer, epoch, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.use_cuda:
            target = target.cuda(async=True)
            input = input.cuda()
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

    print_log(
        "  Epoch: [{:03d}][{:03d}/{:03d}]   "
        "Time {batch_time.val:.3f} ({batch_time.avg:.3f})   "
        "Data {data_time.val:.3f} ({data_time.avg:.3f})   "
        "Loss {loss.val:.4f} ({loss.avg:.4f})   "
        "Prec@1 {top1.val:.3f} ({top1.avg:.3f})   "
        "Prec@5 {top5.val:.3f} ({top5.avg:.3f})   ".format(
            epoch,
            i,
            len(train_loader),
            batch_time=batch_time,
            data_time=data_time,
            loss=losses,
            top1=top1,
            top5=top5,
        )
        + time_string(),
        log,
    )
    return top1.avg, losses.avg
def train(train_loader, model, criterion, optimizer, epoch, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.use_cuda:
            target = target.cuda(async=True)
            input = input.cuda()
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()

        for k, m in enumerate(model.modules()):
            if isinstance(m, nn.Conv2d):
                weight_copy = m.weight.data.abs().clone()
                mask = weight_copy.gt(0).float().cuda()
                m.weight.grad.data.mul_(mask)

        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_log('  Epoch: [{:03d}][{:03d}/{:03d}]   '
                        'Time {batch_time.val:.3f} ({batch_time.avg:.3f})   '
                        'Data {data_time.val:.3f} ({data_time.avg:.3f})   '
                        'Loss {loss.val:.4f} ({loss.avg:.4f})   '
                        'Prec@1 {top1.val:.3f} ({top1.avg:.3f})   '
                        'Prec@5 {top5.val:.3f} ({top5.avg:.3f})   '.format(
                        epoch, i, len(train_loader), batch_time=batch_time,
                        data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log)
    print_log('  **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log)
    return top1.avg, losses.avg
示例#6
0
def train(lang_1,
          lang_2,
          pairs,
          encoder,
          decoder,
          output_dir,
          n_epochs=500000,
          learning_rate=0.001,
          print_every=1000,
          save_every=5000,
          debug=False):

    LOGGER.info('Starting training process...')

    save_every_epoch_start = time.time()

    for epoch in range(1, n_epochs + 1):

        start = time.time()

        LOGGER.debug('Start training epoch %i at %s' % (epoch, time_string()))

        # Train the particular iteration
        train_iter(lang_1,
                   lang_2,
                   pairs,
                   encoder,
                   decoder,
                   len(pairs),
                   print_every=print_every,
                   learning_rate=learning_rate)

        LOGGER.debug('Finished training epoch %i at %s' %
                     (epoch, time_string()))
        LOGGER.debug('Time taken for epoch %i = %s' %
                     (epoch, time_since(start, epoch / n_epochs)))

        if epoch % save_every == 0:
            LOGGER.info('Saving model at epoch %i...' % epoch)
            LOGGER.info('Time taken for %i epochs = %s' %
                        (save_every,
                         time_since(save_every_epoch_start, epoch / n_epochs)))
            save_models(encoder, decoder, learning_rate, epoch, output_dir)
示例#7
0
文件: train.py 项目: ZLKong/Pruning
def train(train_loader, model, criterion, optimizer, epoch, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.use_cuda:
            target = target.cuda(async=True)
            input = input.cuda()
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        output = model(input_var)
        loss = criterion(output, target_var)

        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('  Epoch: [{:03d}][{:03d}/{:03d}]   '
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})   '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})   '
                  'Loss {loss.val:.4f} ({loss.avg:.4f})   '
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})   '
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})   '.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5) + time_string())
    print(
        '  **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'
        .format(top1=top1, top5=top5, error1=100 - top1.avg))
    return top1.avg, losses.avg
示例#8
0
    async def timer(self, ctx):
        """Prints out the timer"""

        time_remaining = gamedata.GAME_LENGTH
        while time_remaining > 0:
            time_remaining -= ctx.game.timer_gap

            if ctx.game.show_timer:
                time = utils.time_string(time_remaining)
                await ctx.send(time)
            await asyncio.sleep(ctx.game.timer_gap / ctx.game.game_speed)
示例#9
0
def extract_eval_dataset(backbone, mode, extractors, all_test_datasets,
                         test_loader, num_iters, logger, save_dir):
    # dataset_models = DATASET_MODELS_DICT[backbone]

    logger.print('\n{:} starting extract the {:} mode by {:} iters.'.format(
        time_string(), mode, save_dir, num_iters))
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.compat.v1.Session(config=config) as session:
        for idata, test_dataset in enumerate(all_test_datasets):
            logger.print('===>>> {:} --->>> {:02d}/{:02d} --->>> {:}'.format(
                time_string(), idata, len(all_test_datasets), test_dataset))
            x_save_dir = save_dir / '{:}-{:}'.format(
                mode, num_iters) / '{:}'.format(test_dataset)
            x_save_dir.mkdir(parents=True, exist_ok=True)
            for idx in tqdm(range(num_iters)):
                # extract image features and labels
                if mode == "val":
                    sample = test_loader.get_validation_task(
                        session, test_dataset)
                elif mode == "test":
                    sample = test_loader.get_test_task(session, test_dataset)
                else:
                    raise ValueError("invalid mode:{}".format(mode))

                with torch.no_grad():
                    context_labels = sample['context_labels']
                    target_labels = sample['target_labels']
                    # batch x #extractors x #features
                    context_features = extract_features(
                        extractors, sample['context_images'])
                    target_features = extract_features(extractors,
                                                       sample['target_images'])
                    to_save_info = {
                        'context_features': context_features.cpu(),
                        'context_labels': context_labels.cpu(),
                        'target_features': target_features.cpu(),
                        'target_labels': target_labels.cpu()
                    }
                    save_name = x_save_dir / '{:06d}.pth'.format(idx)
                    torch.save(to_save_info, save_name)
示例#10
0
def train(train_loader, model, criterion, optimizer, var_optimizer, epoch,
          args, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.train()
    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        data_time.update(time.time() - end)

        input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)

        if epoch < 5:
            warmup_learning_rate(optimizer, var_optimizer, epoch, i,
                                 len(train_loader), args)

        output = model(input)
        loss = criterion(output, target)

        prec1, prec5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.detach().item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))
        # print_log(loss.item(), log)

        optimizer.zero_grad()
        var_optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        var_optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

    print_log(
        '  Epoch: [{:03d}]   '
        'Time {batch_time.avg:.3f}   '
        'Data {data_time.avg:.3f}   '
        'Loss {loss.avg:.4f}   '
        'Prec@1 {top1.avg:.3f}   '
        'Prec@5 {top5.avg:.3f}   '.format(epoch,
                                          batch_time=batch_time,
                                          data_time=data_time,
                                          loss=losses,
                                          top1=top1,
                                          top5=top5) + time_string(), log)
    return top1.avg, losses.avg
示例#11
0
def train(train_loader, model, criterion, kfac, args, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # print(i)
        # if i == 10:
        #     break
        data_time.update(time.time() - end)

        input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)

        output = model(input)
        dist = torch.distributions.Categorical(logits=output)
        sampled_labels = dist.sample()
        loss = criterion(output, sampled_labels)

        prec1, prec5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.detach().item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        model.zero_grad()
        loss.backward()
        kfac.update(batch_size=input.size(0))

        batch_time.update(time.time() - end)
        end = time.time()

        if i == len(train_loader) - 1:
            print_log(
                'Time {batch_time.avg:.3f}   '
                'Data {data_time.avg:.3f}   '
                'Loss {loss.avg:.4f}   '
                'Prec@1 {top1.avg:.3f}   '
                'Prec@5 {top5.avg:.3f}   '.format(batch_time=batch_time,
                                                  data_time=data_time,
                                                  loss=losses,
                                                  top1=top1,
                                                  top5=top5) + time_string(),
                log)
    return top1.avg, losses.avg
示例#12
0
def train(train_loader, model, criterion, optimizer, epoch, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        data_time.update(time.time() - end)
        target = target.cuda(non_blocking=True)

        output = model(input)
        loss = criterion(output, target)

        prec1, prec5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_log(
                '  Epoch: [{:03d}][{:03d}/{:03d}]   '
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})   '
                'Data {data_time.val:.3f} ({data_time.avg:.3f})   '
                'Loss {loss.val:.4f} ({loss.avg:.4f})   '
                'Prec@1 {top1.val:.3f} ({top1.avg:.3f})   '
                'Prec@5 {top5.val:.3f} ({top5.avg:.3f})   '.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    loss=losses,
                    top1=top1,
                    top5=top5) + time_string(), log)
    print_log(
        '  **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'
        .format(top1=top1, top5=top5, error1=100 - top1.avg), log)
    return top1.avg, losses.avg
示例#13
0
def train(train_loader, model, criterion, optimizer, epoch, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    arch_prob = []

    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        data_time.update(time.time() - end)
        target = target.cuda(non_blocking=True)

        if args.aux:
            output, output_aux = model(input)
        else:
            output = model(input)

        loss = criterion(output, target)
        if args.aux:
            loss += args.aux_weight * criterion(output_aux, target)

        optimizer.zero_grad()
        loss.backward()
    
        prec1, prec5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        if i == len(train_loader)-1:
            print_log('  Epoch: [{:03d}][{:03d}/{:03d}]   '
                        'Time {batch_time.avg:.3f}   '
                        'Data {data_time.avg:.3f}   '
                        'Loss {loss.avg:.4f}   '
                        'Prec@1 {top1.avg:.3f}   '
                        'Prec@5 {top5.avg:.3f}   '.format(
                        epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, 
                        loss=losses, top1=top1, top5=top5) + time_string(), log)
            if len(arch_prob) > 0: print_log(np.array_repr(np.stack(arch_prob).sum(0)/50000.), log)
    return top1.avg, losses.avg
示例#14
0
def train(train_loader, model, criterion, optimizer, epoch, log):
  batch_time = AverageMeter()
  data_time = AverageMeter()
  losses = AverageMeter()
  top1 = AverageMeter()
  top5 = AverageMeter()
  # switch to train mode
  model.train()

  end = time.time()
  for i, (input, target) in enumerate(train_loader):
    # measure data loading time
    data_time.update(time.time() - end)

    if args.use_cuda:
      target = target.cuda(async=True)
      input = input.cuda()
    input_var = torch.autograd.Variable(input)
    target_var = torch.autograd.Variable(target)

    # compute output
    output = model(input_var)
    loss = criterion(output, target_var)

    # measure accuracy and record loss
    prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
    losses.update(loss.data[0], input.size(0))
    top1.update(prec1[0], input.size(0))
    top5.update(prec5[0], input.size(0))

    # compute gradient and do SGD step
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # measure elapsed time
    batch_time.update(time.time() - end)
    end = time.time()

  print_log('  Epoch: [{:03d}][{:03d}/{:03d}]   '
        'Time {batch_time.val:.3f} ({batch_time.avg:.3f})   '
        'Data {data_time.val:.3f} ({data_time.avg:.3f})   '
        'Loss {loss.val:.4f} ({loss.avg:.4f})   '
        'Prec@1 {top1.val:.3f} ({top1.avg:.3f})   '
        'Prec@5 {top5.val:.3f} ({top5.avg:.3f})   '.format(
        epoch, i, len(train_loader), batch_time=batch_time,
        data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log)
  return top1.avg, losses.avg
示例#15
0
def create_summary(summary_dir, summary_off=False):
    if summary_off:
        return {
            'train': tf.summary.create_noop_writer(),
            'eval': tf.summary.create_noop_writer()
        }

    logdir = os.path.join(summary_dir, time_string())
    train_log = os.path.join(logdir, 'train')
    eval_log = os.path.join(logdir, 'eval')
    os.makedirs(train_log, exist_ok=True)
    os.makedirs(eval_log, exist_ok=True)
    return {
        'train': tf.summary.create_file_writer(train_log),
        'eval': tf.summary.create_file_writer(eval_log)
    }
def crop_style(list_file, num_pts, save_dir):
  #style = 'Original'
  #save_dir = 'cache/{}'.format(style)
  print ('Cropping face images into {:}'.format(save_dir))
  if not osp.isdir(save_dir): os.makedirs(save_dir)
  transform  = transforms.Compose([transforms.PreCrop(0.2), transforms.TrainScale2WH((256, 256))])
  data = datasets.GeneralDataset(transform, 1, 8, 'gaussian', 'test')
  data.load_list(list_file, num_pts, True)
  #loader = torch.utils.data.DataLoader(data, batch_size=1, shuffle=False, num_workers=12, pin_memory=True)
  for i, tempx in enumerate(data):
    image = tempx[0]
    #points = tempx[3]
    basename = osp.basename(data.datas[i])
    save_name = osp.join(save_dir, basename)
    image.save(save_name)
    if i % PRINT_GAP == 0:
      print ('{:} --->>> process the {:4d}/{:4d}-th image into {:}'.format(time_string(), i, len(data), save_dir))
示例#17
0
def basic_eval_all(args, loaders, net, criterion, epoch_str, logger,
                   opt_config):
    args = deepcopy(args)
    logger.log('Basic-Eval-All evaluates {:} dataset'.format(len(loaders)))
    nmes = []
    for i, (loader, is_video) in enumerate(loaders):
        logger.log(
            '==>>{:}, [{:}], evaluate the {:}/{:}-th dataset [{:}] : {:}'.
            format(time_string(), epoch_str, i, len(loaders),
                   'video' if is_video else 'image', loader.dataset))
        with torch.no_grad():
            eval_loss, eval_meta = basic_eval(
                args, loader, net, criterion,
                epoch_str + "::{:}/{:}".format(i, len(loaders)), logger,
                opt_config)
        nme, _, _ = eval_meta.compute_mse(logger)
        meta_path = logger.path('meta') / 'eval-{:}-{:02d}-{:02d}.pth'.format(
            epoch_str, i, len(loaders))
        eval_meta.save(str(meta_path))
        nmes.append(nme)
    return ', '.join(['{:.1f}'.format(x) for x in nmes])
示例#18
0
def train(train_loader, net, criterion_CE, optimizer, epoch, recorder, logger,
          args):

    batch_time_meter = AverageMeter()
    stats = recorder.train_stats
    meters = {stat: AverageMeter() for stat in stats}

    net.train()

    end = time.time()
    for i, (imgs, labels, views) in enumerate(train_loader):
        imgs_var = torch.autograd.Variable(imgs.cuda())
        labels_var = torch.autograd.Variable(labels.cuda())

        _, predictions = net(imgs_var)

        optimizer.zero_grad()
        softmax = criterion_CE(predictions, labels_var)
        softmax.backward()
        acc = accuracy(predictions.data, labels.cuda(), topk=(1, ))
        optimizer.step()

        # update meters
        meters['acc'].update(acc[0][0], args.batch_size)
        meters['loss'].update(softmax.data.mean(), args.batch_size)

        # measure elapsed time
        batch_time_meter.update(time.time() - end)
        freq = args.batch_size / batch_time_meter.avg
        end = time.time()

        if i % args.print_freq == 0:
            logger.print_log(
                '  Epoch: [{:03d}][{:03d}/{:03d}]   Freq {:.1f}   '.format(
                    epoch, i, len(train_loader), freq) +
                create_stat_string(meters) + time_string())

    logger.print_log('  **Train**  ' + create_stat_string(meters))

    recorder.update(epoch=epoch, is_train=True, meters=meters)
def perform_attack(attacker, model, model_clean, train_loader, test_loader,
                   N_iter, log, writer):
    # Note that, attack has to be done in evaluation model due to batch-norm.
    # see: https://discuss.pytorch.org/t/what-does-model-eval-do-for-batchnorm-layer/7146
    model.eval()
    losses = AverageMeter()
    iter_time = AverageMeter()
    attack_time = AverageMeter()

    # attempt to use the training data to conduct BFA
    for _, (data, target) in enumerate(train_loader):
        if args.use_cuda:
            target = target.cuda(async=True)
            data = data.cuda()
        # Override the target to prevent label leaking
        _, target = model(data).data.max(1)
        break

    # evaluate the test accuracy of clean model
    val_acc_top1, val_acc_top5, val_loss = validate(test_loader, model,
                                                    attacker.criterion, log)

    writer.add_scalar('attack/val_top1_acc', val_acc_top1, 0)
    writer.add_scalar('attack/val_top5_acc', val_acc_top5, 0)
    writer.add_scalar('attack/val_loss', val_loss, 0)

    print_log('k_top is set to {}'.format(args.k_top), log)
    print_log('Attack sample size is {}'.format(data.size()[0]), log)
    end = time.time()
    for i_iter in range(N_iter):
        print_log('**********************************', log)
        attacker.progressive_bit_search(model, data, target)

        # measure data loading time
        attack_time.update(time.time() - end)
        end = time.time()

        h_dist = hamming_distance(model, model_clean)

        # record the loss
        losses.update(attacker.loss_max, data.size(0))

        print_log(
            'Iteration: [{:03d}/{:03d}]   '
            'Attack Time {attack_time.val:.3f} ({attack_time.avg:.3f})  '.
            format((i_iter + 1),
                   N_iter,
                   attack_time=attack_time,
                   iter_time=iter_time) + time_string(), log)

        print_log('loss before attack: {:.4f}'.format(attacker.loss.item()),
                  log)
        print_log('loss after attack: {:.4f}'.format(attacker.loss_max), log)
        print_log('bit flips: {:.0f}'.format(attacker.bit_counter), log)
        print_log('hamming_dist: {:.0f}'.format(h_dist), log)

        writer.add_scalar('attack/bit_flip', attacker.bit_counter, i_iter + 1)
        writer.add_scalar('attack/h_dist', h_dist, i_iter + 1)
        writer.add_scalar('attack/sample_loss', losses.avg, i_iter + 1)

        # exam the BFA on entire val dataset
        val_acc_top1, val_acc_top5, val_loss = validate(
            test_loader, model, attacker.criterion, log)

        writer.add_scalar('attack/val_top1_acc', val_acc_top1, i_iter + 1)
        writer.add_scalar('attack/val_top5_acc', val_acc_top5, i_iter + 1)
        writer.add_scalar('attack/val_loss', val_loss, i_iter + 1)

        # measure elapsed time
        iter_time.update(time.time() - end)
        print_log(
            'iteration Time {iter_time.val:.3f} ({iter_time.avg:.3f})'.format(
                iter_time=iter_time), log)
        end = time.time()

    return
示例#20
0
def train(train_loader, model, criterion, optimizer, epoch, log):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.use_cuda:
            target = target.cuda(async=True)
            input = input.cuda()
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy2(output.data, target, topk=(1, 1))
        losses.update(loss.data[0], input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print_log(
                '  Epoch: [{:03d}][{:03d}/{:03d}]   '
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})   '
                'Data {data_time.val:.3f} ({data_time.avg:.3f})   '
                'Loss {loss.val:.4f} ({loss.avg:.4f})   '
                'Prec@1 {top1.val:.3f} ({top1.avg:.3f})   '
                'Prec@5 {top5.val:.3f} ({top5.avg:.3f})   '.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    loss=losses,
                    top1=top1,
                    top5=top5) + time_string(), log)
    print_log(
        '  **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'
        .format(top1=top1, top5=top5, error1=100 - top1.avg), log)
    # log to TensorBoard
    if args.tensorboard:
        log_value('train_loss', losses.avg, epoch)
        log_value('train_error', top1.avg, epoch)
    return top1.avg, losses.avg
示例#21
0
def main():
    # Init logger
    if not os.path.isdir(args.save_path):
        os.makedirs(args.save_path)
    print('Dataset: {}'.format(args.dataset.upper()))

    if args.dataset == "seedlings" or args.dataset == "bone":
        classes, class_to_idx, num_to_class, df = GenericDataset.find_classes(
            args.data_path)
    if args.dataset == "ISIC2017":
        classes, class_to_idx, num_to_class, df = GenericDataset.find_classes_melanoma(
            args.data_path)

    df.head(3)

    args.num_classes = len(classes)
    # Init model, criterion, and optimizer
    # net = models.__dict__[args.arch](num_classes)
    # net= kmodels.simpleXX_generic(num_classes=args.num_classes, imgDim=args.imgDim)
    # net= kmodels.vggnetXX_generic(num_classes=args.num_classes,  imgDim=args.imgDim)
    # net= kmodels.vggnetXX_generic(num_classes=args.num_classes,  imgDim=args.imgDim)
    net = kmodels.dpn92(num_classes=args.num_classes)
    # print_log("=> network :\n {}".format(net), log)

    real_model_name = (type(net).__name__)
    print("=> Creating model '{}'".format(real_model_name))
    import datetime

    exp_name = datetime.datetime.now().strftime(real_model_name + '_' +
                                                args.dataset +
                                                '_%Y-%m-%d_%H-%M-%S')
    print('Training ' + real_model_name +
          ' on {} dataset:'.format(args.dataset.upper()))

    mPath = args.save_path + '/' + args.dataset + '/' + real_model_name + '/'
    args.save_path_model = mPath
    if not os.path.isdir(args.save_path_model):
        os.makedirs(args.save_path_model)

    log = open(os.path.join(mPath, 'seed_{}.txt'.format(args.manualSeed)), 'w')
    print_log('save path : {}'.format(args.save_path), log)
    state = {k: v for k, v in args._get_kwargs()}
    print_log(state, log)
    print("Random Seed: {}".format(args.manualSeed))
    print("python version : {}".format(sys.version.replace('\n', ' ')))
    print("torch  version : {}".format(torch.__version__))
    print("cudnn  version : {}".format(torch.backends.cudnn.version()))

    # Init dataset
    if not os.path.isdir(args.data_path):
        os.makedirs(args.data_path)
    normalize_img = torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

    train_trans = transforms.Compose([
        transforms.RandomSizedCrop(args.img_scale),
        PowerPIL(),
        transforms.ToTensor(),
        # normalize_img,
        RandomErasing()
    ])

    ## Normalization only for validation and test
    valid_trans = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(args.img_scale),
        transforms.ToTensor(),
        # normalize_img
    ])

    test_trans = valid_trans

    train_data = df.sample(frac=args.validationRatio)
    valid_data = df[~df['file'].isin(train_data['file'])]

    train_set = GenericDataset(train_data,
                               args.data_path,
                               transform=train_trans)
    valid_set = GenericDataset(valid_data,
                               args.data_path,
                               transform=valid_trans)

    t_loader = DataLoader(train_set,
                          batch_size=args.batch_size,
                          shuffle=True,
                          num_workers=0)
    v_loader = DataLoader(valid_set,
                          batch_size=args.batch_size,
                          shuffle=True,
                          num_workers=0)
    # test_loader  = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4)

    dataset_sizes = {
        'train': len(t_loader.dataset),
        'valid': len(v_loader.dataset)
    }
    print(dataset_sizes)
    # net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))
    criterion = torch.nn.CrossEntropyLoss()

    # optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'],
    #               weight_decay=state['decay'], nesterov=True)

    # optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate)
    optimizer = torch.optim.SGD(net.parameters(),
                                state['learning_rate'],
                                momentum=state['momentum'],
                                weight_decay=state['decay'],
                                nesterov=True)
    # optimizer = torch.optim.Adam(net.parameters(), lr=state['learning_rate'])

    if args.use_cuda:
        net.cuda()
        criterion.cuda()

    recorder = RecorderMeter(args.epochs)
    # optionally resume from a checkpoint
    if args.evaluate:
        validate(v_loader, net, criterion, log)
        return
    if args.tensorboard:
        configure("./logs/runs/%s" % (exp_name))

    print('    Total params: %.2fM' %
          (sum(p.numel() for p in net.parameters()) / 1000000.0))

    # Main loop
    start_training_time = time.time()
    training_time = time.time()
    start_time = time.time()
    epoch_time = AverageMeter()
    for epoch in tqdm(range(args.start_epoch, args.epochs)):
        current_learning_rate = adjust_learning_rate(optimizer, epoch,
                                                     args.gammas,
                                                     args.schedule)
        need_hour, need_mins, need_secs = convert_secs2time(
            epoch_time.avg * (args.epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)
        print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
    # print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
                    + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log)

        tqdm.write(
            '\n==>>Epoch=[{:03d}/{:03d}]], {:s}, LR=[{}], Batch=[{}]'.format(
                epoch, args.epochs, time_string(), state['learning_rate'],
                args.batch_size) + ' [Model={}]'.format(
                    (type(net).__name__), ), log)

        # train for one epoch
        train_acc, train_los = train(t_loader, net, criterion, optimizer,
                                     epoch, log)
        val_acc, val_los = validate(v_loader, net, criterion, epoch, log)
        is_best = recorder.update(epoch, train_los, train_acc, val_los,
                                  val_acc)

        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()
        training_time = time.time() - start_training_time
        recorder.plot_curve(
            os.path.join(mPath, real_model_name + '_' + exp_name + '.png'),
            training_time, net, real_model_name, dataset_sizes,
            args.batch_size, args.learning_rate, args.dataset, args.manualSeed,
            args.num_classes)

        if float(val_acc) > float(95.0):
            print("*** EARLY STOP ***")
            df_pred = testSeedlingsModel(args.test_data_path, net,
                                         num_to_class, test_trans)
            model_save_path = os.path.join(
                mPath, real_model_name + '_' + str(val_acc) + '_' +
                str(val_los) + "_" + str(epoch))

            df_pred.to_csv(model_save_path + "_sub.csv",
                           columns=('file', 'species'),
                           index=None)
            torch.save(net.state_dict(), model_save_path + '_.pth')

            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    # 'arch': args.arch,
                    'state_dict': net.state_dict(),
                    'recorder': recorder,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                mPath,
                str(val_acc) + '_' + str(val_los) + "_" + str(epoch) +
                '_checkpoint.pth.tar')

    log.close()
示例#22
0
def main():
    # Init logger
    if not os.path.isdir(args.save_path):
        os.makedirs(args.save_path)
    log = open(
        os.path.join(args.save_path,
                     'log_seed_{}.txt'.format(args.manualSeed)), 'w')
    print_log('save path : {}'.format(args.save_path), log)
    state = {k: v for k, v in args._get_kwargs()}
    print_log(state, log)
    print_log("Random Seed: {}".format(args.manualSeed), log)
    print_log("python version : {}".format(sys.version.replace('\n', ' ')),
              log)
    print_log("torch  version : {}".format(torch.__version__), log)
    print_log("cudnn  version : {}".format(torch.backends.cudnn.version()),
              log)
    print_log("Compress Rate: {}".format(args.rate), log)
    print_log("Layer Begin: {}".format(args.layer_begin), log)
    print_log("Layer End: {}".format(args.layer_end), log)
    print_log("Layer Inter: {}".format(args.layer_inter), log)
    print_log("Epoch prune: {}".format(args.epoch_prune), log)
    # Init dataset
    if not os.path.isdir(args.data_path):
        os.makedirs(args.data_path)

    if args.dataset == 'cifar10':
        mean = [x / 255 for x in [125.3, 123.0, 113.9]]
        std = [x / 255 for x in [63.0, 62.1, 66.7]]
    elif args.dataset == 'cifar100':
        mean = [x / 255 for x in [129.3, 124.1, 112.4]]
        std = [x / 255 for x in [68.2, 65.4, 70.4]]
    else:
        assert False, "Unknow dataset : {}".format(args.dataset)

    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    test_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(mean, std)])

    if args.dataset == 'cifar10':
        train_data = dset.CIFAR10(args.data_path,
                                  train=True,
                                  transform=train_transform,
                                  download=True)
        test_data = dset.CIFAR10(args.data_path,
                                 train=False,
                                 transform=test_transform,
                                 download=True)
        num_classes = 10
    elif args.dataset == 'cifar100':
        train_data = dset.CIFAR100(args.data_path,
                                   train=True,
                                   transform=train_transform,
                                   download=True)
        test_data = dset.CIFAR100(args.data_path,
                                  train=False,
                                  transform=test_transform,
                                  download=True)
        num_classes = 100
    elif args.dataset == 'svhn':
        train_data = dset.SVHN(args.data_path,
                               split='train',
                               transform=train_transform,
                               download=True)
        test_data = dset.SVHN(args.data_path,
                              split='test',
                              transform=test_transform,
                              download=True)
        num_classes = 10
    elif args.dataset == 'stl10':
        train_data = dset.STL10(args.data_path,
                                split='train',
                                transform=train_transform,
                                download=True)
        test_data = dset.STL10(args.data_path,
                               split='test',
                               transform=test_transform,
                               download=True)
        num_classes = 10
    elif args.dataset == 'imagenet':
        assert False, 'Do not finish imagenet code'
    else:
        assert False, 'Do not support dataset : {}'.format(args.dataset)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)

    print_log("=> creating model '{}'".format(args.arch), log)
    # Init model, criterion, and optimizer
    net = models.__dict__[args.arch](num_classes)
    net_ref = models.__dict__[args.arch](num_classes)
    print_log("=> network :\n {}".format(net), log)

    net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))
    net_ref = torch.nn.DataParallel(net_ref, device_ids=list(range(args.ngpu)))

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()

    optimizer = torch.optim.SGD(net.parameters(),
                                state['learning_rate'],
                                momentum=state['momentum'],
                                weight_decay=state['decay'],
                                nesterov=True)

    if args.use_cuda:
        net.cuda()
        criterion.cuda()

    recorder = RecorderMeter(args.epochs)
    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print_log("=> loading checkpoint '{}'".format(args.resume), log)
            checkpoint = torch.load(args.resume)
            net_ref = checkpoint['state_dict']
            print_log(
                "=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint['epoch']), log)
        else:
            print_log("=> no checkpoint found at '{}'".format(args.resume),
                      log)
    else:
        print_log(
            "=> do not use any checkpoint for {} model".format(args.arch), log)

    ###################################################################################################################
    for m, m_ref in zip(net.modules(), net_ref.modules()):
        if isinstance(m, nn.Conv2d):
            weight_copy = m_ref.weight.data.abs().clone()
            mask = weight_copy.gt(0).float().cuda()
            n = mask.sum() / float(m.in_channels)
            m.weight.data.normal_(0, math.sqrt(2. / n))
            m.weight.data.mul_(mask)
    ###################################################################################################################

    if args.evaluate:
        time1 = time.time()
        validate(test_loader, net, criterion, log)
        time2 = time.time()
        print('function took %0.3f ms' % ((time2 - time1) * 1000.0))
        return

    m = Mask(net)

    m.init_length()

    comp_rate = args.rate
    print("-" * 10 + "one epoch begin" + "-" * 10)
    print("the compression rate now is %f" % comp_rate)

    val_acc_1, val_los_1 = validate(test_loader, net_ref, criterion, log)

    print(" accu before is: %.3f %%" % val_acc_1)

    if args.use_cuda:
        net = net.cuda()
    val_acc_2, val_los_2 = validate(test_loader, net, criterion, log)
    print(" accu after is: %s %%" % val_acc_2)

    # Main loop
    start_time = time.time()
    epoch_time = AverageMeter()
    for epoch in range(args.start_epoch, args.epochs):
        current_learning_rate = adjust_learning_rate(optimizer, epoch,
                                                     args.gammas,
                                                     args.schedule)

        need_hour, need_mins, need_secs = convert_secs2time(
            epoch_time.avg * (args.epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)

        print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
                                + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log)

        num_parameters = get_conv_zero_param(net)
        print_log('Zero parameters: {}'.format(num_parameters), log)
        num_parameters = sum([param.nelement() for param in net.parameters()])
        print_log('Parameters: {}'.format(num_parameters), log)

        # train for one epoch
        train_acc, train_los = train(train_loader, net, criterion, optimizer,
                                     epoch, log)

        # evaluate on validation set
        val_acc_1, val_los_1 = validate(test_loader, net, criterion, log)

        is_best = recorder.update(epoch, train_los, train_acc, val_los_2,
                                  val_acc_2)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': net,
                'recorder': recorder,
                'optimizer': optimizer.state_dict(),
            }, is_best, args.save_path, 'checkpoint.pth.tar')

        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()

    log.close()
def main():
  # Init logger
  
  if not os.path.isdir(args.save_path):
    os.makedirs(args.save_path)
  log = open(os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w')
  print_log('save path : {}'.format(args.save_path), log)
  state = {k: v for k, v in args._get_kwargs()}
  print_log(state, log)
  print_log("Random Seed: {}".format(args.manualSeed), log)
  print_log("python version : {}".format(sys.version.replace('\n', ' ')), log)
  print_log("torch  version : {}".format(torch.__version__), log)
  print_log("cudnn  version : {}".format(torch.backends.cudnn.version()), log)

  # Init dataset
  
  if not os.path.exists(args.data_path):
    os.makedirs(args.data_path)

  if args.dataset == 'cifar10':
    mean = [x / 255 for x in [125.3, 123.0, 113.9]]
    std = [x / 255 for x in [63.0, 62.1, 66.7]]
  elif args.dataset == 'cifar100':
    mean = [x / 255 for x in [129.3, 124.1, 112.4]]
    std = [x / 255 for x in [68.2, 65.4, 70.4]]
  else:
    assert False, "Unknow dataset : {}".format(args.dataset)

  train_transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
     transforms.Normalize(mean, std)])
  test_transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize(mean, std)])

  if args.dataset == 'cifar10':
    train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True)
    test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True)
    num_classes = 10
  elif args.dataset == 'cifar100':
    train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True)
    test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True)
    num_classes = 100
  elif args.dataset == 'svhn':
    train_data = dset.SVHN(args.data_path, split='train', transform=train_transform, download=True)
    test_data = dset.SVHN(args.data_path, split='test', transform=test_transform, download=True)
    num_classes = 10
  elif args.dataset == 'stl10':
    train_data = dset.STL10(args.data_path, split='train', transform=train_transform, download=True)
    test_data = dset.STL10(args.data_path, split='test', transform=test_transform, download=True)
    num_classes = 10
  elif args.dataset == 'imagenet':
    assert False, 'Do not finish imagenet code'
  else:
    assert False, 'Do not support dataset : {}'.format(args.dataset)

  train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True,
                         num_workers=args.workers, pin_memory=True)
  test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False,
                        num_workers=args.workers, pin_memory=True)

  print_log("=> creating model '{}'".format(args.arch), log)
  # Init model, criterion, and optimizer
  net = models.__dict__[args.arch](num_classes)
  print_log("=> network :\n {}".format(net), log)

  net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

  # define loss function (criterion) and optimizer
  criterion = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'],
                weight_decay=state['decay'], nesterov=False)

  if args.use_cuda:
    net.cuda()
    criterion.cuda()

  recorder = RecorderMeter(args.epochs)
  # optionally resume from a checkpoint
  if args.resume:
    if os.path.isfile(args.resume):
      print_log("=> loading checkpoint '{}'".format(args.resume), log)
      checkpoint = torch.load(args.resume)
      recorder = checkpoint['recorder']
      args.start_epoch = checkpoint['epoch']
      net.load_state_dict(checkpoint['state_dict'])
      optimizer.load_state_dict(checkpoint['optimizer'])
      print_log("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch']), log)
    else:
      raise ValueError("=> no checkpoint found at '{}'".format(args.resume))
  else:
    print_log("=> do not use any checkpoint for {} model".format(args.arch), log)

  if args.evaluate:
    validate(test_loader, net, criterion, log)
    return

  # Main loop
  start_time = time.time()
  epoch_time = AverageMeter()
  for epoch in range(args.start_epoch, args.epochs):
    current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule)

    need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (args.epochs-epoch))
    need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)

    print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
                + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log)

    # train for one epoch
    train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log)

    # evaluate on validation set
    #val_acc,   val_los   = extract_features(test_loader, net, criterion, log)
    val_acc,   val_los   = validate(test_loader, net, criterion, log)
    is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc)

    save_checkpoint({
      'epoch': epoch + 1,
      'arch': args.arch,
      'state_dict': net.state_dict(),
      'recorder': recorder,
      'optimizer' : optimizer.state_dict(),
      'args'      : copy.deepcopy(args),
    }, is_best, args.save_path, 'hb16_10check.pth.tar')

    # measure elapsed time
    epoch_time.update(time.time() - start_time)
    start_time = time.time()
    recorder.plot_curve( os.path.join(args.save_path, 'hb16_10.png') )

  log.close()
def evaluate(args):
    if not args.cpu:
        assert torch.cuda.is_available(), 'CUDA is not available.'
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = True

    print('The image is {:}'.format(args.image))
    print('The model is {:}'.format(args.model))
    snapshot = Path(args.model)
    assert snapshot.exists(), 'The model path {:} does not exist'
    if args.cpu: snapshot = torch.load(snapshot, map_location='cpu')
    else: snapshot = torch.load(snapshot)

    mean_fill = tuple([int(x * 255) for x in [0.5, 0.5, 0.5]])
    normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    param = snapshot['args']
    eval_transform = transforms.Compose([
        transforms.PreCrop(param.pre_crop_expand),
        transforms.TrainScale2WH((param.crop_width, param.crop_height)),
        transforms.ToTensor(), normalize
    ])

    net = models.__dict__[param.arch](param.modelconfig, None)

    if not args.cpu: net = net.cuda()
    weights = models.remove_module_dict(snapshot['state_dict'])
    net.load_state_dict(weights)

    dataset = datasets.GeneralDataset(eval_transform, param.sigma,
                                      param.downsample, param.heatmap_type,
                                      param.dataset_name)
    dataset.reset(param.num_pts)

    print('[{:}] prepare the input data'.format(time_string()))

    print("Using MT-CNN face detector.")
    try:
        face = utils.detect_face_mtcnn(args.image)
    except utils.mtcnn_detector.BBoxNotFound:
        print("MT-CNN detector failed! Using default bbox instead.")
        face = [153.08, 462., 607.78, 1040.42]

    [image, _, _, _, _, _,
     cropped_size], meta = dataset.prepare_input(args.image, face)
    print('[{:}] prepare the input data done'.format(time_string()))
    print('Net : \n{:}'.format(net))
    # network forward
    with torch.no_grad():
        if args.cpu: inputs = image.unsqueeze(0)
        else: inputs = image.unsqueeze(0).cuda()
        gan_output = (net.netG_A(inputs) + net.netG_B(inputs)) / 2
        gan_output = (gan_output * 0.5 + 0.5).squeeze(0).cpu().permute(
            1, 2, 0).numpy()
        Image.fromarray((gan_output * 255).astype(np.uint8)).save(
            args.save_path.replace(".jpg", ".gan.jpg"))
        batch_heatmaps, batch_locs, batch_scos, _ = net(inputs)
        #print ('input-shape : {:}'.format(inputs.shape))
        flops, params = get_model_infos(net, inputs.shape, None)
        print('\nIN-shape : {:}, FLOPs : {:} MB, Params : {:}.'.format(
            list(inputs.shape), flops, params))
        flops, params = get_model_infos(net, None, inputs)
        print('\nIN-shape : {:}, FLOPs : {:} MB, Params : {:}.'.format(
            list(inputs.shape), flops, params))
    print('[{:}] the network forward done'.format(time_string()))

    # obtain the locations on the image in the orignial size
    cpu = torch.device('cpu')
    np_batch_locs, np_batch_scos, cropped_size = batch_locs.to(
        cpu).numpy(), batch_scos.to(cpu).numpy(), cropped_size.numpy()
    locations, scores = np_batch_locs[0, :-1, :], np.expand_dims(
        np_batch_scos[0, :-1], -1)

    scale_h, scale_w = cropped_size[0] * 1. / inputs.size(
        -2), cropped_size[1] * 1. / inputs.size(-1)

    locations[:,
              0], locations[:, 1] = locations[:, 0] * scale_w + cropped_size[
                  2], locations[:, 1] * scale_h + cropped_size[3]
    prediction = np.concatenate((locations, scores), axis=1).transpose(1, 0)
    for i in range(param.num_pts):
        point = prediction[:, i]
        print(
            'The coordinate of {:02d}/{:02d}-th points : ({:.1f}, {:.1f}), score = {:.3f}'
            .format(i, param.num_pts, float(point[0]), float(point[1]),
                    float(point[2])))

    if args.save_path:
        image = draw_image_by_points(args.image, prediction, 1, (255, 0, 0),
                                     False, False)
        image.save(args.save_path)
        print('save image with landmarks into {:}'.format(args.save_path))
    print('finish san evaluation on a single image : {:}'.format(args.image))
示例#25
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc
    args.gpu = gpu
    assert args.gpu is not None
    print("Use GPU: {} for training".format(args.gpu))

    log = open(
        os.path.join(
            args.save_path,
            'log_seed{}{}.txt'.format(args.manualSeed,
                                      '_eval' if args.evaluate else '')), 'w')
    log = (log, args.gpu)

    net = models.__dict__[args.arch](pretrained=True)
    disable_dropout(net)
    net = to_bayesian(net, args.psi_init_range)
    net.apply(unfreeze)

    print_log("Python version : {}".format(sys.version.replace('\n', ' ')),
              log)
    print_log("PyTorch  version : {}".format(torch.__version__), log)
    print_log("CuDNN  version : {}".format(torch.backends.cudnn.version()),
              log)
    print_log(
        "Number of parameters: {}".format(
            sum([p.numel() for p in net.parameters()])), log)
    print_log(str(args), log)

    if args.distributed:
        if args.multiprocessing_distributed:
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url + ":" +
                                args.dist_port,
                                world_size=args.world_size,
                                rank=args.rank)
        torch.cuda.set_device(args.gpu)
        net.cuda(args.gpu)
        args.batch_size = int(args.batch_size / ngpus_per_node)
        net = torch.nn.parallel.DistributedDataParallel(net,
                                                        device_ids=[args.gpu])
    else:
        torch.cuda.set_device(args.gpu)
        net = net.cuda(args.gpu)

    criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu)

    mus, psis = [], []
    for name, param in net.named_parameters():
        if 'psi' in name: psis.append(param)
        else: mus.append(param)
    mu_optimizer = SGD(mus,
                       args.learning_rate,
                       args.momentum,
                       weight_decay=args.decay,
                       nesterov=(args.momentum > 0.0))

    psi_optimizer = PsiSGD(psis,
                           args.learning_rate,
                           args.momentum,
                           weight_decay=args.decay,
                           nesterov=(args.momentum > 0.0))

    recorder = RecorderMeter(args.epochs)
    if args.resume:
        if args.resume == 'auto':
            args.resume = os.path.join(args.save_path, 'checkpoint.pth.tar')
        if os.path.isfile(args.resume):
            print_log("=> loading checkpoint '{}'".format(args.resume), log)
            checkpoint = torch.load(args.resume,
                                    map_location='cuda:{}'.format(args.gpu))
            recorder = checkpoint['recorder']
            recorder.refresh(args.epochs)
            args.start_epoch = checkpoint['epoch']
            net.load_state_dict(
                checkpoint['state_dict'] if args.distributed else {
                    k.replace('module.', ''): v
                    for k, v in checkpoint['state_dict'].items()
                })
            mu_optimizer.load_state_dict(checkpoint['mu_optimizer'])
            psi_optimizer.load_state_dict(checkpoint['psi_optimizer'])
            best_acc = recorder.max_accuracy(False)
            print_log(
                "=> loaded checkpoint '{}' accuracy={} (epoch {})".format(
                    args.resume, best_acc, checkpoint['epoch']), log)
        else:
            print_log("=> no checkpoint found at '{}'".format(args.resume),
                      log)
    else:
        print_log("=> do not use any checkpoint for the model", log)

    cudnn.benchmark = True

    train_loader, ood_train_loader, test_loader, adv_loader, \
        fake_loader, adv_loader2 = load_dataset_ft(args)
    psi_optimizer.num_data = len(train_loader.dataset)

    if args.evaluate:
        evaluate(test_loader, adv_loader, fake_loader, adv_loader2, net,
                 criterion, args, log, 20, 100)
        return

    start_time = time.time()
    epoch_time = AverageMeter()
    train_los = -1

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_loader.sampler.set_epoch(epoch)
            ood_train_loader.sampler.set_epoch(epoch)
        cur_lr, cur_slr = adjust_learning_rate(mu_optimizer, psi_optimizer,
                                               epoch, args)

        need_hour, need_mins, need_secs = convert_secs2time(
            epoch_time.avg * (args.epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)

        print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f} {:6.4f}]'.format(
                                    time_string(), epoch, args.epochs, need_time, cur_lr, cur_slr) \
                    + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log)

        train_acc, train_los = train(train_loader, ood_train_loader, net,
                                     criterion, mu_optimizer, psi_optimizer,
                                     epoch, args, log)
        val_acc, val_los = 0, 0
        recorder.update(epoch, train_los, train_acc, val_acc, val_los)

        is_best = False
        if val_acc > best_acc:
            is_best = True
            best_acc = val_acc

        if args.gpu == 0:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': net.state_dict(),
                    'recorder': recorder,
                    'mu_optimizer': mu_optimizer.state_dict(),
                    'psi_optimizer': psi_optimizer.state_dict(),
                }, False, args.save_path, 'checkpoint.pth.tar')

        epoch_time.update(time.time() - start_time)
        start_time = time.time()
        recorder.plot_curve(os.path.join(args.save_path, 'log.png'))

    evaluate(test_loader, adv_loader, fake_loader, adv_loader2, net, criterion,
             args, log, 20, 100)

    log[0].close()
示例#26
0
    parser.add_argument('--p_epochs', type=int, default=50)
    parser.add_argument('--p_weight_decay', type=float, default=5e-4)
    parser.add_argument('--new_pop_limit', type=int, default=8)
    parser.add_argument('--init_pool_size', type=int, default=32)
    parser.add_argument('--max_samples', type=int, default=100)
    parser.add_argument('--step_size', type=float, default=1.)
    parser.add_argument('--step_batch_size', type=int, default=128)
    # parser.add_argument('--eval_batches', type=int, default=10)

    parser.add_argument('--load_workers',
                        type=int,
                        default=0,
                        help='number of data loading workers')
    parser.add_argument('--log_dir',
                        type=str,
                        default='logs/searches-ws/%s' % time_string(),
                        help='Folder to save checkpoints and log.')
    parser.add_argument('--nas_bench_path',
                        default=None,
                        type=str,
                        help='The path to load NAS-Bench-201.')
    parser.add_argument('--print_freq',
                        type=int,
                        default=200,
                        help='print frequency (default: 200)')
    parser.add_argument('--seed', type=int, default=114514, help='manual seed')
    parser.add_argument('--repeat', type=int, default=1)
    parser.add_argument('--workers', type=int, default=1)
    parser.add_argument('--load_checkpoint', type=str, default=None)
    parser.add_argument('--tag', type=str, default=None)
    args = parser.parse_args()
示例#27
0
def main():
    if not os.path.isdir(args.save_path):
        os.makedirs(args.save_path)
    log = open(
        os.path.join(args.save_path, "log_seed_{}.txt".format(args.manualSeed)), "w"
    )
    print_log("save path : {}".format(args.save_path), log)
    state = {k: v for k, v in args._get_kwargs()}
    print_log(state, log)
    print_log("Random Seed: {}".format(args.manualSeed), log)
    print_log("python version : {}".format(sys.version.replace("\n", " ")), log)
    print_log("torch  version : {}".format(torch.__version__), log)
    print_log("cudnn  version : {}".format(torch.backends.cudnn.version()), log)

    # Init dataset
    if not os.path.isdir(args.data_path):
        os.makedirs(args.data_path)

    if args.dataset == "cifar10":
        mean = [x / 255 for x in [125.3, 123.0, 113.9]]
        std = [x / 255 for x in [63.0, 62.1, 66.7]]
    elif args.dataset == "cifar100":
        mean = [x / 255 for x in [129.3, 124.1, 112.4]]
        std = [x / 255 for x in [68.2, 65.4, 70.4]]
    else:
        assert False, "Unknow dataset : {}".format(args.dataset)

    train_transform = transforms.Compose(
        [
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, padding=4),
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
        ]
    )
    test_transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize(mean, std)]
    )

    if args.dataset == "cifar10":
        train_data = dset.CIFAR10(
            args.data_path, train=True, transform=train_transform, download=True
        )
        test_data = dset.CIFAR10(
            args.data_path, train=False, transform=test_transform, download=True
        )
        num_classes = 10
    elif args.dataset == "cifar100":
        train_data = dset.CIFAR100(
            args.data_path, train=True, transform=train_transform, download=True
        )
        test_data = dset.CIFAR100(
            args.data_path, train=False, transform=test_transform, download=True
        )
        num_classes = 100
    elif args.dataset == "svhn":
        train_data = dset.SVHN(
            args.data_path, split="train", transform=train_transform, download=True
        )
        test_data = dset.SVHN(
            args.data_path, split="test", transform=test_transform, download=True
        )
        num_classes = 10
    elif args.dataset == "stl10":
        train_data = dset.STL10(
            args.data_path, split="train", transform=train_transform, download=True
        )
        test_data = dset.STL10(
            args.data_path, split="test", transform=test_transform, download=True
        )
        num_classes = 10
    elif args.dataset == "imagenet":
        assert False, "Do not finish imagenet code"
    else:
        assert False, "Do not support dataset : {}".format(args.dataset)

    train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True,
    )
    test_loader = torch.utils.data.DataLoader(
        test_data,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True,
    )

    # Init model, criterion, and optimizer
    # net = models.__dict__[args.arch](num_classes).cuda()
    net = SENet34()

    # define loss function (criterion) and optimizer
    criterion = F.nll_loss
    optimizer = torch.optim.SGD(
        net.parameters(),
        state["learning_rate"],
        momentum=state["momentum"],
        weight_decay=state["decay"],
        nesterov=True,
    )

    if args.use_cuda:
        net.cuda()

    recorder = RecorderMeter(args.epochs)
    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print_log("=> loading checkpoint '{}'".format(args.resume), log)
            checkpoint = torch.load(args.resume)
            recorder = checkpoint["recorder"]
            args.start_epoch = checkpoint["epoch"]
            net.load_state_dict(checkpoint["state_dict"])
            optimizer.load_state_dict(checkpoint["optimizer"])
            print_log(
                "=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint["epoch"]
                ),
                log,
            )
        else:
            print_log("=> no checkpoint found at '{}'".format(args.resume), log)
    else:
        print_log("=> do not use any checkpoint for model", log)

    if args.evaluate:
        validate(test_loader, net, criterion, log)
        return

    # Main loop
    start_time = time.time()
    epoch_time = AverageMeter()
    for epoch in range(args.start_epoch, args.epochs):
        current_learning_rate = adjust_learning_rate(
            optimizer, epoch, args.gammas, args.schedule
        )

        need_hour, need_mins, need_secs = convert_secs2time(
            epoch_time.avg * (args.epochs - epoch)
        )
        need_time = "[Need: {:02d}:{:02d}:{:02d}]".format(
            need_hour, need_mins, need_secs
        )

        print_log(
            "\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]".format(
                time_string(), epoch, args.epochs, need_time, current_learning_rate
            )
            + " [Best : Accuracy={:.2f}, Error={:.2f}]".format(
                recorder.max_accuracy(False), 100 - recorder.max_accuracy(False)
            ),
            log,
        )

        # train for one epoch
        train_acc, train_los = train(
            train_loader, net, criterion, optimizer, epoch, log
        )

        # evaluate on validation set
        val_acc, val_los = validate(test_loader, net, criterion, log)
        is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc)

        save_checkpoint(
            {
                "epoch": epoch + 1,
                "state_dict": net.state_dict(),
                "recorder": recorder,
                "optimizer": optimizer.state_dict(),
            },
            is_best,
            args.save_path,
            "checkpoint.pth.tar",
        )

        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()
        recorder.plot_curve(os.path.join(args.save_path, "curve.png"))

    log.close()
def main():
    # Init logger6
    if not os.path.isdir(args.save_path):
        os.makedirs(args.save_path)
    log = open(
        os.path.join(args.save_path,
                     'log_seed_{}.txt'.format(args.manualSeed)), 'w')
    print_log('save path : {}'.format(args.save_path), log)
    state = {k: v for k, v in args._get_kwargs()}
    print_log(state, log)
    print_log("Random Seed: {}".format(args.manualSeed), log)
    print_log("python version : {}".format(sys.version.replace('\n', ' ')),
              log)
    print_log("torch  version : {}".format(torch.__version__), log)
    print_log("cudnn  version : {}".format(torch.backends.cudnn.version()),
              log)

    # Init the tensorboard path and writer
    tb_path = os.path.join(args.save_path, 'tb_log',
                           'run_' + str(args.manualSeed))
    # logger = Logger(tb_path)
    writer = SummaryWriter(tb_path)

    # Init dataset
    if not os.path.isdir(args.data_path):
        os.makedirs(args.data_path)

    if args.dataset == 'cifar10':
        mean = [x / 255 for x in [125.3, 123.0, 113.9]]
        std = [x / 255 for x in [63.0, 62.1, 66.7]]
    elif args.dataset == 'cifar100':
        mean = [x / 255 for x in [129.3, 124.1, 112.4]]
        std = [x / 255 for x in [68.2, 65.4, 70.4]]
    elif args.dataset == 'svhn':
        mean = [0.5, 0.5, 0.5]
        std = [0.5, 0.5, 0.5]
    elif args.dataset == 'mnist':
        mean = [0.5, 0.5, 0.5]
        std = [0.5, 0.5, 0.5]
    elif args.dataset == 'imagenet':
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
    else:
        assert False, "Unknow dataset : {}".format(args.dataset)

    if args.dataset == 'imagenet':
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])
        test_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])  # here is actually the validation dataset
    else:
        train_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, padding=4),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])
        test_transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(mean, std)])

    if args.dataset == 'mnist':
        train_data = dset.MNIST(args.data_path,
                                train=True,
                                transform=train_transform,
                                download=True)
        test_data = dset.MNIST(args.data_path,
                               train=False,
                               transform=test_transform,
                               download=True)
        num_classes = 10
    elif args.dataset == 'cifar10':
        train_data = dset.CIFAR10(args.data_path,
                                  train=True,
                                  transform=train_transform,
                                  download=True)
        test_data = dset.CIFAR10(args.data_path,
                                 train=False,
                                 transform=test_transform,
                                 download=True)
        num_classes = 10
    elif args.dataset == 'cifar100':
        train_data = dset.CIFAR100(args.data_path,
                                   train=True,
                                   transform=train_transform,
                                   download=True)
        test_data = dset.CIFAR100(args.data_path,
                                  train=False,
                                  transform=test_transform,
                                  download=True)
        num_classes = 100
    elif args.dataset == 'svhn':
        train_data = dset.SVHN(args.data_path,
                               split='train',
                               transform=train_transform,
                               download=True)
        test_data = dset.SVHN(args.data_path,
                              split='test',
                              transform=test_transform,
                              download=True)
        num_classes = 10
    elif args.dataset == 'stl10':
        train_data = dset.STL10(args.data_path,
                                split='train',
                                transform=train_transform,
                                download=True)
        test_data = dset.STL10(args.data_path,
                               split='test',
                               transform=test_transform,
                               download=True)
        num_classes = 10
    elif args.dataset == 'imagenet':
        train_dir = os.path.join(args.data_path, 'train')
        test_dir = os.path.join(args.data_path, 'val')
        train_data = dset.ImageFolder(train_dir, transform=train_transform)
        test_data = dset.ImageFolder(test_dir, transform=test_transform)
        num_classes = 1000
    else:
        assert False, 'Do not support dataset : {}'.format(args.dataset)

    train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.attack_sample_size,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              num_workers=args.workers,
                                              pin_memory=True)

    print_log("=> creating model '{}'".format(args.arch), log)

    # Init model, criterion, and optimizer
    net = models.__dict__[args.arch](num_classes)
    print_log("=> network :\n {}".format(net), log)

    if args.use_cuda:
        if args.ngpu > 1:
            net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()

    # separate the parameters thus param groups can be updated by different optimizer
    all_param = [
        param for name, param in net.named_parameters()
        if not 'step_size' in name
    ]

    step_param = [
        param for name, param in net.named_parameters() if 'step_size' in name
    ]

    if args.optimizer == "SGD":
        print("using SGD as optimizer")
        optimizer = torch.optim.SGD(all_param,
                                    lr=state['learning_rate'],
                                    momentum=state['momentum'],
                                    weight_decay=state['decay'],
                                    nesterov=True)

    elif args.optimizer == "Adam":
        print("using Adam as optimizer")
        optimizer = torch.optim.Adam(filter(lambda param: param.requires_grad,
                                            net.parameters()),
                                     lr=state['learning_rate'],
                                     weight_decay=state['decay'])

    elif args.optimizer == "RMSprop":
        print("using RMSprop as optimizer")
        optimizer = torch.optim.RMSprop(filter(
            lambda param: param.requires_grad, net.parameters()),
                                        lr=state['learning_rate'],
                                        alpha=0.99,
                                        eps=1e-08,
                                        weight_decay=0,
                                        momentum=0)

    if args.use_cuda:
        net.cuda()
        criterion.cuda()

    recorder = RecorderMeter(args.epochs)  # count number of epoches

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print_log("=> loading checkpoint '{}'".format(args.resume), log)
            checkpoint = torch.load(args.resume)
            if not (args.fine_tune):
                args.start_epoch = checkpoint['epoch']
                recorder = checkpoint['recorder']
                optimizer.load_state_dict(checkpoint['optimizer'])

            state_tmp = net.state_dict()
            if 'state_dict' in checkpoint.keys():
                state_tmp.update(checkpoint['state_dict'])
            else:
                state_tmp.update(checkpoint)

            net.load_state_dict(state_tmp)

            print_log(
                "=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, args.start_epoch), log)
        else:
            print_log("=> no checkpoint found at '{}'".format(args.resume),
                      log)
    else:
        print_log(
            "=> do not use any checkpoint for {} model".format(args.arch), log)

    # update the step_size once the model is loaded. This is used for quantization.
    for m in net.modules():
        if isinstance(m, quan_Conv2d) or isinstance(m, quan_Linear):
            # simple step size update based on the pretrained model or weight init
            m.__reset_stepsize__()

    # block for quantizer optimization
    if args.optimize_step:
        optimizer_quan = torch.optim.SGD(step_param,
                                         lr=0.01,
                                         momentum=0.9,
                                         weight_decay=0,
                                         nesterov=True)

        for m in net.modules():
            if isinstance(m, quan_Conv2d) or isinstance(m, quan_Linear):
                for i in range(
                        300
                ):  # runs 200 iterations to reduce quantization error
                    optimizer_quan.zero_grad()
                    weight_quan = quantize(m.weight, m.step_size,
                                           m.half_lvls) * m.step_size
                    loss_quan = F.mse_loss(weight_quan,
                                           m.weight,
                                           reduction='mean')
                    loss_quan.backward()
                    optimizer_quan.step()

        for m in net.modules():
            if isinstance(m, quan_Conv2d):
                print(m.step_size.data.item(),
                      (m.step_size.detach() * m.half_lvls).item(),
                      m.weight.max().item())

    # block for weight reset
    if args.reset_weight:
        for m in net.modules():
            if isinstance(m, quan_Conv2d) or isinstance(m, quan_Linear):
                m.__reset_weight__()
                # print(m.weight)

    attacker = BFA(criterion, args.k_top)
    net_clean = copy.deepcopy(net)
    # weight_conversion(net)

    if args.enable_bfa:
        perform_attack(attacker, net, net_clean, train_loader, test_loader,
                       args.n_iter, log, writer)
        return

    if args.evaluate:
        validate(test_loader, net, criterion, log)
        return

    # Main loop
    start_time = time.time()
    epoch_time = AverageMeter()

    for epoch in range(args.start_epoch, args.epochs):
        current_learning_rate, current_momentum = adjust_learning_rate(
            optimizer, epoch, args.gammas, args.schedule)
        # Display simulation time
        need_hour, need_mins, need_secs = convert_secs2time(
            epoch_time.avg * (args.epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)

        print_log(
            '\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f}][M={:1.2f}]'.format(time_string(), epoch, args.epochs,
                                                                                   need_time, current_learning_rate,
                                                                                   current_momentum) \
            + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False),
                                                               100 - recorder.max_accuracy(False)), log)

        # train for one epoch
        train_acc, train_los = train(train_loader, net, criterion, optimizer,
                                     epoch, log)

        # evaluate on validation set
        val_acc, _, val_los = validate(test_loader, net, criterion, log)
        recorder.update(epoch, train_los, train_acc, val_los, val_acc)
        is_best = val_acc >= recorder.max_accuracy(False)

        if args.model_only:
            checkpoint_state = {'state_dict': net.state_dict}
        else:
            checkpoint_state = {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': net.state_dict(),
                'recorder': recorder,
                'optimizer': optimizer.state_dict(),
            }

        save_checkpoint(checkpoint_state, is_best, args.save_path,
                        'checkpoint.pth.tar', log)

        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()
        recorder.plot_curve(os.path.join(args.save_path, 'curve.png'))

        # save addition accuracy log for plotting
        accuracy_logger(base_dir=args.save_path,
                        epoch=epoch,
                        train_accuracy=train_acc,
                        test_accuracy=val_acc)

        # ============ TensorBoard logging ============#

        ## Log the graidents distribution
        for name, param in net.named_parameters():
            name = name.replace('.', '/')
            writer.add_histogram(name + '/grad',
                                 param.grad.clone().cpu().data.numpy(),
                                 epoch + 1,
                                 bins='tensorflow')

        # ## Log the weight and bias distribution
        for name, module in net.named_modules():
            name = name.replace('.', '/')
            class_name = str(module.__class__).split('.')[-1].split("'")[0]

            if "Conv2d" in class_name or "Linear" in class_name:
                if module.weight is not None:
                    writer.add_histogram(
                        name + '/weight/',
                        module.weight.clone().cpu().data.numpy(),
                        epoch + 1,
                        bins='tensorflow')

        writer.add_scalar('loss/train_loss', train_los, epoch + 1)
        writer.add_scalar('loss/test_loss', val_los, epoch + 1)
        writer.add_scalar('accuracy/train_accuracy', train_acc, epoch + 1)
        writer.add_scalar('accuracy/test_accuracy', val_acc, epoch + 1)
    # ============ TensorBoard logging ============#

    log.close()
示例#29
0
def main():
    # Init logger
    if not os.path.isdir(args.save_path):
        os.makedirs(args.save_path)

    # used for file names, etc
    time_stamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    log = open(
        os.path.join(
            args.save_path,
            'log_seed_{0}_{1}.txt'.format(args.manualSeed, time_stamp)), 'w')
    print_log('save path : {}'.format(args.save_path), log)
    state = {k: v for k, v in args._get_kwargs()}
    print_log(state, log)
    print_log("Random Seed: {}".format(args.manualSeed), log)
    print_log("python version : {}".format(sys.version.replace('\n', ' ')),
              log)
    print_log("torch  version : {}".format(torch.__version__), log)
    print_log("cudnn  version : {}".format(torch.backends.cudnn.version()),
              log)

    # Init dataset
    if not os.path.isdir(args.data_path):
        os.makedirs(args.data_path)

    if args.dataset == 'cifar10':
        mean = [x / 255 for x in [125.3, 123.0, 113.9]]
        std = [x / 255 for x in [63.0, 62.1, 66.7]]
    elif args.dataset == 'cifar100':
        mean = [x / 255 for x in [129.3, 124.1, 112.4]]
        std = [x / 255 for x in [68.2, 65.4, 70.4]]
    else:
        assert False, "Unknow dataset : {}".format(args.dataset)

    writer = SummaryWriter()

    #   # Data transforms
    # mean = [0.5071, 0.4867, 0.4408]
    # std = [0.2675, 0.2565, 0.2761]

    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    #[transforms.CenterCrop(32), transforms.ToTensor(),
    # transforms.Normalize(mean, std)])
    #)
    test_transform = transforms.Compose([
        transforms.CenterCrop(32),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])

    if args.dataset == 'cifar10':
        train_data = dset.CIFAR10(args.data_path,
                                  train=True,
                                  transform=train_transform,
                                  download=True)
        test_data = dset.CIFAR10(args.data_path,
                                 train=False,
                                 transform=test_transform,
                                 download=True)
        num_classes = 10
    elif args.dataset == 'cifar100':
        train_data = dset.CIFAR100(args.data_path,
                                   train=True,
                                   transform=train_transform,
                                   download=True)
        test_data = dset.CIFAR100(args.data_path,
                                  train=False,
                                  transform=test_transform,
                                  download=True)
        num_classes = 100
    elif args.dataset == 'imagenet':
        assert False, 'Did not finish imagenet code'
    else:
        assert False, 'Does not support dataset : {}'.format(args.dataset)

    #step_sizes = 2500
    step_sizes = args.alinit
    indices = [l for l in range(0, 50000)]

    annot_indices = [
    ]  # indices which are added to the training pool, list as we store it for all steps
    unannot_indices = [
        indices
    ]  # indices which have not been added to the training pool

    selections = random.sample(range(0, len(unannot_indices[-1])), step_sizes)
    temp = list(np.asarray(unannot_indices[-1])[selections])
    annot_indices.append(temp)

    unannot_indices.append(
        list(set(unannot_indices[-1]) - set(annot_indices[-1])))

    labelled_dset = torch.utils.data.Subset(train_data, annot_indices[-1])
    unlabelled_dset = torch.utils.data.Subset(train_data, unannot_indices[-1])

    #train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True,
    #                       num_workers=args.workers, pin_memory=True)
    labelled_loader = torch.utils.data.DataLoader(labelled_dset,
                                                  batch_size=args.batch_size,
                                                  shuffle=True,
                                                  num_workers=args.workers,
                                                  pin_memory=True)

    #unlabelled_loader = torch.utils.data.DataLoader(unlabelled_dset, batch_size=args.batch_size, shuffle=True,
    #num_workers=args.workers, pin_memory=True)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)

    print_log("=> creating model '{}'".format(args.arch), log)
    # Init model, criterion, and optimizer
    net = models.__dict__[args.arch](num_classes)
    #torch.save(net, 'net.pth')
    #init_net = torch.load('net.pth')
    #net.load_my_state_dict(init_net.state_dict())
    print_log("=> network :\n {}".format(net), log)

    net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()

    #optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=0.005, nesterov=False)
    optimizer = torch.optim.Adadelta(
        net.parameters(),
        lr=0.1,
        rho=0.9,
        eps=1e-3,  # momentum=state['momentum'],
        weight_decay=0.001)

    print_log("=> Seed '{}'".format(args.manualSeed), log)
    print_log("=> dataset mean and std '{} - {}'".format(str(mean), str(std)),
              log)

    states_settings = {'optimizer': optimizer.state_dict()}

    print_log("=> optimizer '{}'".format(states_settings), log)
    # 50k,95k,153k,195k,220k
    milestones = [100, 190, 306, 390, 440, 540]
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1)

    if args.use_cuda:
        net.cuda()
        criterion.cuda()

    recorder = RecorderMeter(args.epochs)
    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print_log("=> loading checkpoint '{}'".format(args.resume), log)
            checkpoint = torch.load(args.resume)
            recorder = checkpoint['recorder']
            args.start_epoch = checkpoint['epoch']
            net.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print_log(
                "=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint['epoch']), log)
        else:
            print_log("=> no checkpoint found at '{}'".format(args.resume),
                      log)
    else:
        print_log(
            "=> did not use any checkpoint for {} model".format(args.arch),
            log)

    if args.evaluate:
        validate(test_loader, net, criterion, log)
        return

    # Main loop
    start_time = time.time()
    epoch_time = AverageMeter()

    al_steps = int(50000 / args.alinit)

    curr_al_step = 0
    dump_data = []

    for (al_step, epoch) in [(a, b) for a in range(al_steps)
                             for b in range(args.start_epoch, args.epochs)]:
        print(" Current AL_step and epoch " + str((al_step, epoch)))
        if (al_step != curr_al_step):

            #These return scores of datapoints in unlabelled dataset according to their indices
            #indices of the data points(w.r.t to the original indexing from 1 to 50000) in the
            #unlabelled dataset
            curr_al_step = al_step
            #Resetting the learning rate scheduler
            scheduler = lr_scheduler.MultiStepLR(optimizer,
                                                 milestones,
                                                 gamma=0.1)

            scores_unlabelled = score(unlabelled_dset, net, criterion)
            indices_sorted = np.argsort(scores_unlabelled)

            #Greedy Sampling
            temp_selections = indices_sorted[-1 * args.alinit:]
            selections = np.asarray(list(
                unlabelled_dset.indices))[temp_selections].tolist()

            annot_indices.append(selections)

            unannot_indices.append(
                set(unannot_indices[-1]) - set(annot_indices[-1]))

            labelled_dset = torch.utils.data.Subset(train_data,
                                                    annot_indices[-1])
            labelled_loader = torch.utils.data.DataLoader(
                labelled_dset,
                batch_size=args.batch_size,
                shuffle=True,
                num_workers=args.workers,
                pin_memory=True)
            unlabelled_dset = torch.utils.data.Subset(train_data,
                                                      unannot_indices[-1])

            indices_data = [annot_indices, unannot_indices]
            filehandler = open("indices.pickle", "wb")
            pickle.dump(indices_data, filehandler)
            filehandler.close()

        #current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule)
        current_learning_rate = float(scheduler.get_lr()[-1])
        #print('lr:',current_learning_rate)

        scheduler.step()

        #adjust_learning_rate(optimizer, epoch)

        need_hour, need_mins, need_secs = convert_secs2time(
            epoch_time.avg * (args.epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)

        print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:.6f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
                    + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log)

        # train for one epoch
        #train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log)
        train_acc, train_los = train(labelled_loader, net, criterion,
                                     optimizer, epoch, log)

        # evaluate on validation set
        #val_acc,   val_los   = extract_features(test_loader, net, criterion, log)
        val_acc, val_los = validate(test_loader, net, criterion, log)
        is_best = recorder.update(epoch, train_los, train_acc, val_los,
                                  val_acc)

        dump_data.append(([al_step, epoch], [train_acc,
                                             train_los], [val_acc, val_los]))
        if (epoch % 50 == 0):
            filehandler = open("accuracy.pickle", "wb")
            pickle.dump(dump_data, filehandler)
            filehandler.close()

        if epoch == 180:
            save_checkpoint(
                {
                    'epoch': epoch,
                    'arch': args.arch,
                    'state_dict': net.state_dict(),
                    'recorder': recorder,
                    'optimizer': optimizer.state_dict(),
                }, False, args.save_path,
                'checkpoint_{0}_{1}.pth.tar'.format(epoch,
                                                    time_stamp), time_stamp)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': net.state_dict(),
                'recorder': recorder,
                'optimizer': optimizer.state_dict(),
            }, is_best, args.save_path,
            'checkpoint_{0}.pth.tar'.format(time_stamp), time_stamp)

        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()
        recorder.plot_curve(
            os.path.join(
                args.save_path,
                'training_plot_{0}_{1}.png'.format(args.manualSeed,
                                                   time_stamp)))

    writer.close()
    log.close()
示例#30
0
文件: train.py 项目: syt2/CRA
def train(cfg, writer, logger):
    # This statement must be declared before using pytorch
    use_cuda = False
    if cfg.get("cuda", None) is not None:
        if cfg.get("cuda", None) != "all":
            os.environ["CUDA_VISIBLE_DEVICES"] = cfg.get("cuda", None)
        use_cuda = torch.cuda.is_available()

    # Setup random seed
    seed = cfg["training"].get("seed", random.randint(1, 10000))
    torch.manual_seed(seed)
    if use_cuda:
        torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # Setup Dataloader
    train_loader, val_loader = get_loader(cfg)

    # Setup Model
    model = get_model(cfg)
    # writer.add_graph(model, torch.rand([1, 3, 224, 224]))
    if use_cuda and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(
                                          range(torch.cuda.device_count())))

    # Setup optimizer, lr_scheduler and loss function
    optimizer = get_optimizer(model.parameters(), cfg)
    scheduler = get_scheduler(optimizer, cfg)
    loss_fn = get_loss_fn(cfg)

    # Setup Metrics
    epochs = cfg["training"]["epochs"]
    recorder = RecorderMeter(epochs)
    start_epoch = 0

    # save model parameters every <n> epochs
    save_interval = cfg["training"]["save_interval"]

    if use_cuda:
        model.cuda()
        loss_fn.cuda()

    # Resume Trained Model
    resume_path = os.path.join(writer.file_writer.get_logdir(),
                               cfg["training"]["resume"])
    best_path = os.path.join(writer.file_writer.get_logdir(),
                             cfg["training"]["best_model"])

    if cfg["training"]["resume"] is not None:
        if os.path.isfile(resume_path):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    resume_path))
            checkpoint = torch.load(resume_path)
            state = checkpoint["state_dict"]
            if torch.cuda.device_count() <= 1:
                state = convert_state_dict(state)
            model.load_state_dict(state)
            optimizer.load_state_dict(checkpoint["optimizer"])
            scheduler.load_state_dict(checkpoint["scheduler"])
            start_epoch = checkpoint["epoch"]
            recorder = checkpoint['recorder']
            logger.info("Loaded checkpoint '{}' (epoch {})".format(
                resume_path, checkpoint["epoch"]))
        else:
            logger.info("No checkpoint found at '{}'".format(resume_path))

    epoch_time = AverageMeter()
    for epoch in range(start_epoch, epochs):
        start_time = time.time()
        need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg *
                                                            (epochs - epoch))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)
        logger.info(
            '\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:8.6f}]'.
            format(time_string(), epoch, epochs, need_time, optimizer.
                   param_groups[0]['lr']) +  # scheduler.get_last_lr() >=1.4
            ' [Best : Accuracy={:.2f}]'.format(recorder.max_accuracy(False)))
        train_acc, train_los = train_epoch(train_loader, model, loss_fn,
                                           optimizer, use_cuda, logger)
        val_acc, val_los = validate_epoch(val_loader, model, loss_fn, use_cuda,
                                          logger)
        scheduler.step()

        is_best = recorder.update(epoch, train_los, train_acc, val_los,
                                  val_acc)
        if is_best or epoch % save_interval == 0 or epoch == epochs - 1:  # save model (resume model and best model)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'recorder': recorder,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict(),
                }, is_best, best_path, resume_path)

            for name, param in model.named_parameters():  # save histogram
                writer.add_histogram(name,
                                     param.clone().cpu().data.numpy(), epoch)

        writer.add_scalar('Train/loss', train_los, epoch)  # save curves
        writer.add_scalar('Train/acc', train_acc, epoch)
        writer.add_scalar('Val/loss', val_los, epoch)
        writer.add_scalar('Val/acc', val_acc, epoch)

        epoch_time.update(time.time() - start_time)

    writer.close()
示例#31
0
文件: main.py 项目: AJSVB/GPBT
    def step(self):
        log = open(
            os.path.join(
                args.save_path,
                'log_seed_{0}_{1}.txt'.format(args.manualSeed,
                                              self.time_stamp)), 'a')

        start_time = time.time()
        epoch_time = AverageMeter()
        #current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule)
        #current_learning_rate = float(self.scheduler.get_last_lr()[-1])
        #print('lr:',current_learning_rate)

        #self.scheduler.step()

        #adjust_learning_rate(optimizer, epoch)

        need_hour, need_mins, need_secs = convert_secs2time(
            epoch_time.avg * (args.epochs - self.i))
        need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(
            need_hour, need_mins, need_secs)

        print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:.6f}]'.format(time_string(), self.i, args.epochs, need_time, self.args['lr']) \
                    + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(self.recorder.max_accuracy(False), 100-self.recorder.max_accuracy(False)),log)

        train_acc, train_los = self.train1()
        val_acc, val_los = self.val1()
        is_best = self.recorder.update(self.i - 1, train_los, train_acc,
                                       val_los, val_acc)

        #  save_checkpoint({
        #    'epoch': self.i,
        #    'arch': args.arch,
        #    'state_dict': self.net.state_dict(),
        #    'recorder': self.recorder,
        #    'optimizer' : self.optimizer.state_dict(),
        #  }, is_best, args.save_path, 'checkpoint_{0}.pth.tar'.format(self.time_stamp), self.time_stamp)

        # measure elapsed time
        epoch_time.update(time.time() - start_time)
        start_time = time.time()
        #self.recorder.plot_curve( os.path.join(args.save_path, 'training_plot_{0}_{1}.png'.format(args.manualSeed, self.time_stamp)) )
        log.close()
        return train_acc, train_los, val_acc, val_los
示例#32
0
def main():
  if not os.path.isdir(args.save_path): os.makedirs(args.save_path)
  log = open(os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w')
  print_log('save path : {}'.format(args.save_path), log)
  state = {k: v for k, v in args._get_kwargs()}
  print_log(state, log)
  print_log("Random Seed: {}".format(args.manualSeed), log)
  print_log("python version : {}".format(sys.version.replace('\n', ' ')), log)
  print_log("torch  version : {}".format(torch.__version__), log)
  print_log("cudnn  version : {}".format(torch.backends.cudnn.version()), log)

  # Init dataset
  if not os.path.isdir(args.data_path):
    os.makedirs(args.data_path)

  if args.dataset == 'cifar10':
    mean = [x / 255 for x in [125.3, 123.0, 113.9]]
    std = [x / 255 for x in [63.0, 62.1, 66.7]]
  elif args.dataset == 'cifar100':
    mean = [x / 255 for x in [129.3, 124.1, 112.4]]
    std = [x / 255 for x in [68.2, 65.4, 70.4]]
  else:
    assert False, "Unknow dataset : {}".format(args.dataset)

  train_transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
     transforms.Normalize(mean, std)])
  test_transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize(mean, std)])

  if args.dataset == 'cifar10':
    train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True)
    test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True)
    num_classes = 10
  elif args.dataset == 'cifar100':
    train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True)
    test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True)
    num_classes = 100
  elif args.dataset == 'svhn':
    train_data = dset.SVHN(args.data_path, split='train', transform=train_transform, download=True)
    test_data = dset.SVHN(args.data_path, split='test', transform=test_transform, download=True)
    num_classes = 10
  elif args.dataset == 'stl10':
    train_data = dset.STL10(args.data_path, split='train', transform=train_transform, download=True)
    test_data = dset.STL10(args.data_path, split='test', transform=test_transform, download=True)
    num_classes = 10
  elif args.dataset == 'imagenet':
    assert False, 'Do not finish imagenet code'
  else:
    assert False, 'Do not support dataset : {}'.format(args.dataset)

  train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True,
                         num_workers=args.workers, pin_memory=True)
  test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False,
                        num_workers=args.workers, pin_memory=True)

  # Init model, criterion, and optimizer
  #net = models.__dict__[args.arch](num_classes).cuda()
  net = SENet34()

  # define loss function (criterion) and optimizer
  criterion = F.nll_loss
  optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'],
                weight_decay=state['decay'], nesterov=True)

  if args.use_cuda: net.cuda()

  recorder = RecorderMeter(args.epochs)
  # optionally resume from a checkpoint
  if args.resume:
    if os.path.isfile(args.resume):
      print_log("=> loading checkpoint '{}'".format(args.resume), log)
      checkpoint = torch.load(args.resume)
      recorder = checkpoint['recorder']
      args.start_epoch = checkpoint['epoch']
      net.load_state_dict(checkpoint['state_dict'])
      optimizer.load_state_dict(checkpoint['optimizer'])
      print_log("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch']), log)
    else:
      print_log("=> no checkpoint found at '{}'".format(args.resume), log)
  else:
    print_log("=> do not use any checkpoint for model", log)

  if args.evaluate:
    validate(test_loader, net, criterion, log)
    return

  # Main loop
  start_time = time.time()
  epoch_time = AverageMeter()
  for epoch in range(args.start_epoch, args.epochs):
    current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule)

    need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (args.epochs-epoch))
    need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)

    print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
                + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log)

    # train for one epoch
    train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log)

    # evaluate on validation set
    val_acc,   val_los   = validate(test_loader, net, criterion, log)
    is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc)

    save_checkpoint({
      'epoch': epoch + 1,
      'state_dict': net.state_dict(),
      'recorder': recorder,
      'optimizer' : optimizer.state_dict(),
    }, is_best, args.save_path, 'checkpoint.pth.tar')

    # measure elapsed time
    epoch_time.update(time.time() - start_time)
    start_time = time.time()
    recorder.plot_curve( os.path.join(args.save_path, 'curve.png') )

  log.close()