예제 #1
0
def evaluate_two_stage(model, test_batch, args):
    avg_loss = metric.AverageMeter('avg_loss', ':.4e')
    single_time = metric.AverageMeter('Time', ':6.3f')
    progress = metric.ProgressMeter(len(test_batch),
                                    avg_loss,
                                    single_time,
                                    prefix="Evaluation: ")
    model.eval()

    label_list = []
    psnr_list = []
    logit_list = []
    counter = 0
    for k, (images, labels) in enumerate(test_batch):
        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)
        counter += 1

        label = labels if args.label else None
        channel = (images.size()[1] // args.c - 1) * args.c
        # input_image = images[:, 0:channel]
        # target_image = images[:, channel:]
        input_image = images.detach()
        target_image = images.detach()

        with autocast():
            reconstructed_image, loss, logit = model.forward(input_image,
                                                             gt=target_image,
                                                             label=label,
                                                             train=False)
            loss = loss['pixel_loss'].view(loss['pixel_loss'].shape[0],
                                           -1).mean(1)

        assert len(loss) == len(
            label
        ), "During inference, loss sample number must match label sample number."
        for i in range(len(loss)):
            psnr_list.append(psnr(loss[i].item()))
            logit_list.append(logit[i].item())
            label_list.append(label[i].item())
            avg_loss.update(loss[i].item(), 1)

    psnr_score_total_list = np.asarray(psnr_score_list(psnr_list))
    label_list = np.asarray(label_list)
    logit_list = np.asarray(logit_list)
    assert psnr_score_total_list.size == label_list.size, "INFERENCE LENGTH MUST MATCH LABEL LENGTH."

    # final_score = 0.8 * logit_list + 0.2 * (1 - psnr_score_total_list)
    final_score = logit_list
    accuracy = roc_auc_score(y_true=label_list, y_score=final_score)
    # plot_AUC(psnr_score_total_list, np.expand_dims(1 - labels_list, 0))
    print("EVALUATE FRAME NUMBER: ", psnr_score_total_list.size)
    return accuracy, avg_loss.avg
예제 #2
0
def validate(val_loader, model, args, streams=None):
    """validate function"""
    batch_time = metric.AverageMeter('Time', ':6.3f')
    avg_ce_loss = metric.AverageMeter('ce_loss', ':.4e')

    # record the top1 accuray of each small network
    top1_all = []
    for i in range(args.loop_factor):
        top1_all.append(metric.AverageMeter('{}_Acc@1'.format(i), ':6.2f'))
    avg_top1 = metric.AverageMeter('Avg_Acc@1', ':6.2f')
    avg_top5 = metric.AverageMeter('Avg_Acc@1', ':6.2f')
    progress = metric.ProgressMeter(len(val_loader),
                                    batch_time,
                                    avg_ce_loss,
                                    *top1_all,
                                    avg_top1,
                                    avg_top5,
                                    prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            if args.gpu is not None:
                images = images.cuda(args.gpu, non_blocking=True)
            target = target.cuda(args.gpu, non_blocking=True)

            # compute outputs and losses
            if args.is_amp:
                with amp.autocast():
                    ensemble_output, outputs, ce_loss = model(images,
                                                              target=target,
                                                              mode='val')
            else:
                ensemble_output, outputs, ce_loss = model(images,
                                                          target=target,
                                                          mode='val')

            # measure accuracy and record loss
            batch_size_now = images.size(0)
            for j in range(args.loop_factor):
                acc1, acc5 = metric.accuracy(outputs[j, ...],
                                             target,
                                             topk=(1, 5))
                top1_all[j].update(acc1[0].item(), batch_size_now)

            # simply average outputs of small networks
            avg_acc1, avg_acc5 = metric.accuracy(ensemble_output,
                                                 target,
                                                 topk=(1, 5))
            avg_top1.update(avg_acc1[0].item(), batch_size_now)
            avg_top5.update(avg_acc5[0].item(), batch_size_now)

            avg_ce_loss.update(ce_loss.mean().item(), batch_size_now)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                progress.print(i)

        acc_all = []
        acc_all.append(avg_top1.avg)
        acc_all.append(avg_top5.avg)
        acc_info = '* Acc@1 {:.3f} Acc@5 {:.3f}'.format(acc_all[0], acc_all[1])
        for j in range(args.loop_factor):
            acc_all.append(top1_all[j].avg)
            acc_info += '\t {}_acc@1 {:.3f}'.format(j, top1_all[j].avg)

        print(acc_info)

    # torch.cuda.empty_cache()
    return acc_all
예제 #3
0
def train(train_loader,
          model,
          optimizer,
          scheduler,
          epoch,
          args,
          streams=None,
          scaler=None):
    """training function"""
    batch_time = metric.AverageMeter('Time', ':6.3f')
    data_time = metric.AverageMeter('Data', ':6.3f')
    avg_ce_loss = metric.AverageMeter('ce_loss', ':.4e')
    avg_cot_loss = metric.AverageMeter('cot_loss', ':.4e')

    # record the top1 accuray of each small network
    top1_all = []
    for i in range(args.loop_factor):
        # ce_losses_l.append(metric.AverageMeter('{}_CE_Loss'.format(i), ':.4e'))
        top1_all.append(metric.AverageMeter('{}_Acc@1'.format(i), ':6.2f'))
    avg_top1 = metric.AverageMeter('Avg_Acc@1', ':6.2f')
    #if args.dataset == 'imagenet':
    #	avg_top5 = metric.AverageMeter('Avg_Acc@1', ':6.2f')

    # show all
    total_iters = len(train_loader)
    progress = metric.ProgressMeter(total_iters,
                                    batch_time,
                                    data_time,
                                    avg_ce_loss,
                                    avg_cot_loss,
                                    *top1_all,
                                    avg_top1,
                                    prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()
    end = time.time()

    # prefetch data
    prefetcher = prefetch.data_prefetcher(train_loader)
    images, target = prefetcher.next()
    i = 0
    """Another way to load the data
	for i, (images, target) in enumerate(train_loader):
	
		# measure data loading time
		data_time.update(time.time() - end)

		if args.gpu is not None:
			images = images.cuda(args.gpu, non_blocking=True)
		target = target.cuda(args.gpu, non_blocking=True)
	"""
    optimizer.zero_grad()
    while images is not None:
        # measure data loading time
        data_time.update(time.time() - end)
        # adjust the lr first
        scheduler(optimizer, i, epoch)
        i += 1

        # compute outputs and losses
        if args.is_amp:
            # Runs the forward pass with autocasting.
            with amp.autocast():
                ensemble_output, outputs, ce_loss, cot_loss = model(
                    images,
                    target=target,
                    mode='train',
                    epoch=epoch,
                    streams=streams)
        else:
            ensemble_output, outputs, ce_loss, cot_loss = model(
                images,
                target=target,
                mode='train',
                epoch=epoch,
                streams=streams)

        # measure accuracy and record loss
        batch_size_now = images.size(0)
        # notice the index i and j, avoid contradictory
        for j in range(args.loop_factor):
            acc1 = metric.accuracy(outputs[j, ...], target, topk=(1, ))
            top1_all[j].update(acc1[0].item(), batch_size_now)

        # simply average outputs of small networks
        avg_acc1 = metric.accuracy(ensemble_output, target, topk=(1, ))
        avg_top1.update(avg_acc1[0].item(), batch_size_now)
        # avg_top5.update(avg_acc1[0].item(), batch_size_now)

        avg_ce_loss.update(ce_loss.mean().item(), batch_size_now)
        avg_cot_loss.update(cot_loss.mean().item(), batch_size_now)

        # compute gradient and do SGD step
        total_loss = (ce_loss + cot_loss) / args.iters_to_accumulate

        if args.is_amp:
            # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
            # Backward passes under autocast are not recommended.
            # Backward ops run in the same dtype autocast chose for corresponding forward ops.
            scaler.scale(total_loss).backward()

            if i % args.iters_to_accumulate == 0 or i == total_iters:
                # scaler.step() first unscales the gradients of the optimizer's assigned params.
                # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
                # otherwise, optimizer.step() is skipped.
                scaler.step(optimizer)
                # Updates the scale for next iteration.
                scaler.update()
                optimizer.zero_grad()
        else:
            total_loss.backward()
            if i % args.iters_to_accumulate == 0 or i == total_iters:
                optimizer.step()
                optimizer.zero_grad()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if not args.multiprocessing_distributed or (args.rank %
                                                    args.ngpus_per_node == 0):
            if i % (args.print_freq * args.iters_to_accumulate) == 0:
                progress.print(i)
        images, target = prefetcher.next()
예제 #4
0
def evaluate_two_stage_object(model, test_batch, args):
    avg_loss = metric.AverageMeter('avg_loss', ':.4e')
    single_time = metric.AverageMeter('Time', ':6.3f')
    progress = metric.ProgressMeter(len(test_batch),
                                    avg_loss,
                                    single_time,
                                    prefix="Evaluation: ")

    model.eval()
    label_list = []
    psnr_list = []
    logit_list = []
    ct = 0
    counter = 0
    for k, (images, labels, bboxes) in enumerate(test_batch):
        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)
        bboxes = [x.cuda(non_blocking=True) for x in bboxes]
        a = time.time()
        counter += 1

        patches, patch_labels, bbox_num = get_object_images(
            images, labels, bboxes, args)  # [K,C,H,W] [K] [B]

        if patches is None:
            for i in range(len(labels)):
                label_list.append(labels[i].item())
                psnr_list.append(100.0)
        else:
            del images
            batch_size_now = len(bbox_num)
            ct += patches.size()[0]

            label = labels if args.label else None
            channel = (patches.size()[1] // args.c - 1) * args.c
            input_image = patches[:, 0:channel]
            target_image = patches[:, channel:]

            with autocast():
                reconstructed_image, loss, logit = model.forward(
                    input_image, gt=target_image, label=label, train=False)
                loss = loss['pixel_loss'].view(loss['pixel_loss'].shape[0],
                                               -1).mean(1)
            assert len(loss) == len(
                label
            ), "During inference, loss sample number must match label sample number."

            start_ = 0
            for i, num_ in enumerate(bbox_num):  # per sample in batch
                logit_per_sample = torch.max(
                    logit[start_:start_ + num_]).item() if num_ > 0 else 0
                loss_per_sample = torch.max(
                    loss[start_:start_ + num_]).item() if num_ > 0 else 0
                psnr_list.append(psnr(loss_per_sample))  # TODO: Max or Mean
                logit_list.append(logit_per_sample)
                label_list.append(labels[i].item())
                avg_loss.update(loss_per_sample, batch_size_now)
                start_ += num_

            assert start_ == logit.size(
            )[0], "patch num and bbox_num doesn't match"

            if args.evaluate_time:
                single_time.update((time.time() - a) * 1000)
                progress.print(counter)
                # print("Single batch time cost {}ms, loss {}".format(1000*(time.time()-a), loss.mean().item()))

    psnr_score_total_list = np.asarray(psnr_score_list(psnr_list))
    label_list = np.asarray(label_list)
    logit_list = np.asarray(logit_list)
    assert psnr_score_total_list.size == label_list.size and psnr_score_total_list.size == logit_list.size, "INFERENCE LENGTH MUST MATCH LABEL LENGTH."

    final_score = 0.1 * logit_list + 0.9 * (1 - psnr_score_total_list)
    # final_score = logit_list
    accuracy = roc_auc_score(y_true=label_list, y_score=final_score)
    # accuracy1 = roc_auc_score(y_true=label_list, y_score=1-psnr_score_total_list)
    # plot_AUC(psnr_score_total_list, np.expand_dims(1 - label_list, 0))
    print("EVAL FRAME & BOX NUMBER & ACC : ", psnr_score_total_list.size, ct,
          accuracy * 100)

    return accuracy, avg_loss.avg
예제 #5
0
def train_D(train_batch, model, D, optimizer, optimizer_D, epoch, args):
    batch_time = metric.AverageMeter('Time', ':6.3f')
    data_time = metric.AverageMeter('Data', ':6.3f')
    avg_loss = metric.AverageMeter('avg_loss', ':.4e')
    avg_loss_D = metric.AverageMeter('avg_loss_D', ':.4e')
    progress = metric.ProgressMeter(len(train_batch),
                                    batch_time,
                                    data_time,
                                    avg_loss,
                                    avg_loss_D,
                                    prefix="Epoch: [{}]".format(epoch))

    model.train()
    D.train()
    end = time.time()

    if args.object_detection:
        prefetcher = prefetch.data_prefetcher_trible(train_batch)
        images, labels, bboxes = prefetcher.next()
    else:
        prefetcher = prefetch.data_prefetcher(train_batch)
        images, labels = prefetcher.next()
        bboxes = None

    optimizer.zero_grad()
    optimizer_D.zero_grad()
    counter = -1
    while images is not None:
        data_time.update(time.time() - end)
        counter += 1

        # whether split into object
        if args.object_detection:
            # 5 - 10 ms
            patches, labels, bbox_num = get_object_images(
                images, labels, bboxes, args)  # [K,C,H,W] [K] [B]
            del images
            batch_size_now = len(bbox_num)
        else:
            patches = images
            batch_size_now = images.size()[0]

        if patches is None:  # prevent no input
            if args.object_detection:
                images, labels, bboxes = prefetcher.next()
            else:
                images, labels = prefetcher.next()
                bboxes = None

            continue

        label = labels if args.label else None
        assert label.sum() == 0, "training label must equal to zero"

        channel = (patches.size()[1] // args.c - 1) * args.c
        input_image = patches[:, 0:channel]
        target_image = patches[:, channel:]

        if args.visualize_input:
            _ = visualize_single(target_image)

        optimizer.zero_grad()
        optimizer_D.zero_grad()

        # G Loss
        with autocast():
            reconstructed_image, loss = model.forward(input_image,
                                                      gt=target_image,
                                                      label=label,
                                                      train=True)
            # loss = sum(loss.values())
            loss_bak = loss['pixel_loss'].view(loss['pixel_loss'].shape[0],
                                               -1).mean(1)
            loss = loss['pixel_loss'].mean()

        # WGAN
        weight_cliping_limit = 0.01
        for p in D.parameters():
            p.data.clamp_(-weight_cliping_limit, weight_cliping_limit)

        # loss_bak n;  reconstructed_image n,3,h,w; target_image n,3,h,w; input_image n,6,h,w;
        b, c, h, w = target_image.size()
        in_label = torch.zeros([b]).cuda()
        in_label_fake = torch.ones([b]).cuda()
        in_image = reconstructed_image - target_image

        # optimize G
        if epoch >= 1:
            # G&D Loss
            with autocast():
                loss_D_fake, _ = D(in_image, in_label_fake, train=True)
            # advasarial inverse target
            loss1 = loss_D_fake * 0.2  # TODO: coef

            args.scaler.scale(loss + loss1).backward()
            if args.gradient_clip:
                args.scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               args.gradient_clip)
            args.scaler.step(optimizer)
            args.scaler.update()
            avg_loss.update(loss.mean().item() + loss1.item(), batch_size_now)
        else:
            args.scaler.scale(loss).backward()
            if args.gradient_clip:
                args.scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               args.gradient_clip)
            args.scaler.step(optimizer)
            args.scaler.update()
            avg_loss.update(loss.mean().item(), batch_size_now)

        # optimize D
        optimizer_D.zero_grad()
        # label Threshold
        t = 7e-3 - 2e-3 * (epoch // 15)
        in_label[torch.where(loss_bak > t)] = 1  # TODO: T adjust
        if counter % 100 == 0:
            print("ANOMALY RATIO: ", sum(in_label) / in_label.size()[0])
        in_image = in_image.detach()
        with autocast():
            loss_D, logit = D(in_image, in_label, train=True)

        args.scaler_D.scale(loss_D).backward()
        args.scaler_D.step(optimizer_D)
        args.scaler_D.update()
        avg_loss_D.update(loss_D.item(), batch_size_now)
        # ACC
        logit[logit > 0.5] = 1
        logit[logit <= 0.5] = 0
        acc = torch.true_divide(sum(torch.eq(logit - in_label, 0)), len(logit))
        if counter % 100 == 0:
            print("ACC is: ", acc)

        batch_time.update(time.time() - end)
        end = time.time()
        if args.rank % args.ngpus_per_node == 0:
            if counter % args.print_freq == 0:
                progress.print(counter)

        if args.visualize:
            _ = visualize(reconstructed_image, target_image)

        if args.object_detection:
            images, labels, bboxes = prefetcher.next()
        else:
            images, labels = prefetcher.next()
            bboxes = None

    print("Training sample number of epoch {} is: {}".format(
        epoch, counter * int(args.batch_size)))
    return avg_loss.avg
예제 #6
0
def train(train_batch, model, optimizer, epoch, args):
    batch_time = metric.AverageMeter('Time', ':6.3f')
    data_time = metric.AverageMeter('Data', ':6.3f')
    avg_loss = metric.AverageMeter('avg_loss', ':.4e')
    # show all
    progress = metric.ProgressMeter(len(train_batch),
                                    batch_time,
                                    data_time,
                                    avg_loss,
                                    prefix="Epoch: [{}]".format(epoch))

    model.train()
    end = time.time()

    if args.object_detection:
        prefetcher = prefetch.data_prefetcher_trible(train_batch)
        images, labels, bboxes = prefetcher.next()
    else:
        prefetcher = prefetch.data_prefetcher(train_batch)
        images, labels = prefetcher.next()
        bboxes = None

    optimizer.zero_grad()
    counter = -1
    while images is not None:
        data_time.update(time.time() - end)
        counter += 1

        # whether split into object
        if args.object_detection:
            # 5 - 10 ms
            patches, labels, bbox_num = get_object_images(
                images, labels, bboxes, args)  # [K,C,H,W] [K] [B]
            del images
            batch_size_now = len(bbox_num)
        else:
            patches = images
            batch_size_now = images.size()[0]

        if patches is None:  # prevent no input
            if args.object_detection:
                images, labels, bboxes = prefetcher.next()
            else:
                images, labels = prefetcher.next()
                bboxes = None

            continue

        label = labels if args.label else None
        #assert label.sum() == 0, "training label must equal to zero"

        channel = (patches.size()[1] // args.c - 1) * args.c
        # input_image = patches[:, 0:channel]
        # target_image = patches[:, channel:]
        input_image = patches.detach()
        target_image = patches.detach()

        if args.visualize_input:
            _ = visualize_single(target_image)

        optimizer.zero_grad()

        with autocast():
            if 'Classifier' in args.arch:
                reconstructed_image, loss, _ = model.forward(input_image,
                                                             gt=target_image,
                                                             label=label,
                                                             train=True)
                loss = loss['pixel_loss'].mean(
                ) + loss['classifier_loss'].mean()
            else:
                reconstructed_image, loss = model.forward(input_image,
                                                          gt=target_image,
                                                          label=label,
                                                          train=True)
                loss = loss['pixel_loss'].mean()

        args.scaler.scale(loss).backward()
        if args.gradient_clip:
            args.scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.gradient_clip)
        args.scaler.step(optimizer)
        args.scaler.update()
        avg_loss.update(loss.mean().item(), batch_size_now)

        batch_time.update(time.time() - end)
        end = time.time()
        if args.rank % args.ngpus_per_node == 0:
            if counter % args.print_freq == 0:
                progress.print(counter)

        if args.visualize:
            _ = visualize(reconstructed_image, target_image)

        if args.object_detection:
            images, labels, bboxes = prefetcher.next()
        else:
            images, labels = prefetcher.next()
            bboxes = None

    print("Training sample number of epoch {} is: {}".format(
        epoch, counter * int(args.batch_size)))
    return avg_loss.avg
예제 #7
0
def multigpu_test_2gpus(args):
    """
	This is a simple program for validating the idea of parallel runing of multiple
	model on multiple gpus.
	"""
    model = splitnet.SplitNet(args,
                              norm_layer=norm.norm(args.norm_mode),
                              criterion=None)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("INFO:PyTorch: => loading checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)
            old_dict = checkpoint['state_dict']
            # orignial ckpt was save as nn.parallel.DistributedDataParallel() object
            old_dict = {
                k.replace("module.models", "models"): v
                for k, v in old_dict.items()
            }
            model.load_state_dict(old_dict)
            print("INFO:PyTorch: => loaded checkpoint"
                  " '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
        else:
            print("INFO:PyTorch: => no checkpoint found at '{}'".format(
                args.resume))

    # accelarate the training
    torch.backends.cudnn.benchmark = True

    val_loader = factory.get_data_loader(args.data,
                                         batch_size=args.eval_batch_size,
                                         crop_size=args.crop_size,
                                         dataset=args.dataset,
                                         split="val",
                                         num_workers=args.workers)
    # record the top1 accuray of each small network
    top1_all = []
    for i in range(args.loop_factor):
        top1_all.append(metric.AverageMeter('{}_Acc@1'.format(i), ':6.2f'))
    avg_top1 = metric.AverageMeter('Avg_Acc@1', ':6.2f')
    avg_top5 = metric.AverageMeter('Avg_Acc@1', ':6.2f')
    progress = metric.ProgressMeter(len(val_loader),
                                    *top1_all,
                                    avg_top1,
                                    avg_top5,
                                    prefix='Test: ')

    # switch to evaluate mode
    model.eval()
    # move model to the gpu
    if args.is_test_on_multigpus:
        print("INFO:PyTorch: multi GPUs test")
        cuda_models = []
        for idx in range(args.split_factor):
            cuda_models.append(model.models[idx].cuda(idx))
    else:
        print("INFO:PyTorch: single GPU test")
        model = model.cuda(0)

    with torch.no_grad():
        # record time and number of samples
        prefetcher = data_prefetcher_2gpus(val_loader, ngpus=args.split_factor)
        images_gpu0, target, images_gpu1 = prefetcher.next()
        i = 0
        n_count = 0.0
        start_time = time.time()

        while images_gpu0 is not None:
            i += 1
            # for i, (images, target) in enumerate(val_loader):
            # compute outputs and losses
            if args.is_test_on_multigpus:
                if args.is_amp:
                    with amp.autocast():
                        output_gpu0 = cuda_models[0](images_gpu0)
                    with amp.autocast():
                        output_gpu1 = cuda_models[1](images_gpu1)
                else:
                    output_gpu0 = cuda_models[0](images_gpu0)
                    output_gpu1 = cuda_models[1](images_gpu1)

                if _GEO_TEST:
                    if i == 1:
                        print("using geometry mean")
                    output_gpu0 = F.softmax(output_gpu0, dim=-1)
                    output_gpu1 = F.softmax(output_gpu1, dim=-1)
                    ensemble_output = torch.sqrt(output_gpu0 *
                                                 output_gpu1.cuda(0))
                else:
                    outputs = torch.stack([output_gpu0, output_gpu1.cuda(0)])
                    ensemble_output = torch.mean(outputs, dim=0)

            else:
                # compute outputs and losses
                if args.is_amp:
                    with amp.autocast():
                        ensemble_output, outputs, ce_loss = model(
                            images_gpu0, target=target, mode='val')
                else:
                    ensemble_output, outputs, ce_loss = model(images_gpu0,
                                                              target=target,
                                                              mode='val')

            # measure accuracy and record loss
            """
			target = target.cpu()
			ensemble_output = ensemble_output.cpu().float()
			outputs = outputs.cpu().float()
			"""

            batch_size_now = images_gpu0.size(0)
            """
			for j in range(args.loop_factor):
				acc1, acc5 = metric.accuracy(outputs[j, ...], target, topk=(1, 5))
				top1_all[j].update(acc1[0].item(), batch_size_now)
			"""
            # simply average outputs of small networks
            avg_acc1, avg_acc5 = metric.accuracy(ensemble_output,
                                                 target,
                                                 topk=(1, 5))
            avg_top1.update(avg_acc1[0].item(), batch_size_now)
            avg_top5.update(avg_acc5[0].item(), batch_size_now)

            images_gpu0, target, images_gpu1 = prefetcher.next()

            n_count += batch_size_now
            """
			if i % args.print_freq == 0:
				progress.print(i)
			"""
        time_cnt = time.time() - start_time
        # print accuracy info
        acc_all = []
        acc_all.append(avg_top1.avg)
        acc_all.append(avg_top5.avg)
        acc_info = '* Acc@1 {:.3f} Acc@5 {:.3f}'.format(acc_all[0], acc_all[1])
        """
		mean_acc = 0.0
		for j in range(args.loop_factor):
			acc_all.append(top1_all[j].avg)
			acc_info += '\t {}_acc@1 {:.3f}'.format(j, top1_all[j].avg)
			mean_acc += top1_all[j].avg
		acc_info += "\t avg_acc {:.3f}".format(mean_acc / args.split_factor)
		"""
        print(acc_info)

    print("multiple GPUs ({})".format(args.is_test_on_multigpus))
    print("The tested architecture is {} with split_factor {}".format(
        args.arch, args.split_factor))
    print("The number of the samples is {}".format(n_count))
    print("The total testing time is {} second".format(time_cnt))
    print("The average test time is {}ms per images".format(1000 * time_cnt /
                                                            n_count))

    torch.cuda.empty_cache()
    sys.exit(0)
예제 #8
0
def multistreams_test(args):
    """
	This is a simple program for validating the idea of parallel runing of multiple
	model on single gpu via multi cuda streams.
	"""
    model = splitnet.SplitNet(args,
                              norm_layer=norm.norm(args.norm_mode),
                              criterion=None)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("INFO:PyTorch: => loading checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)
            old_dict = checkpoint['state_dict']
            # orignial ckpt was save as nn.parallel.DistributedDataParallel() object
            old_dict = {
                k.replace("module.models", "models"): v
                for k, v in old_dict.items()
            }

            model.load_state_dict(old_dict)
            print("INFO:PyTorch: => loaded checkpoint"
                  " '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
        else:
            print("INFO:PyTorch: => no checkpoint found at '{}'".format(
                args.resume))

    # accelarate the training
    torch.backends.cudnn.benchmark = True

    val_loader = factory.get_data_loader(args.data,
                                         batch_size=args.eval_batch_size,
                                         crop_size=args.crop_size,
                                         dataset=args.dataset,
                                         split="val",
                                         num_workers=args.workers)
    # record the top1 accuray of each small network
    top1_all = []
    for i in range(args.loop_factor):
        top1_all.append(metric.AverageMeter('{}_Acc@1'.format(i), ':6.2f'))
    avg_top1 = metric.AverageMeter('Avg_Acc@1', ':6.2f')
    avg_top5 = metric.AverageMeter('Avg_Acc@1', ':6.2f')
    progress = metric.ProgressMeter(len(val_loader),
                                    *top1_all,
                                    avg_top1,
                                    avg_top5,
                                    prefix='Test: ')

    # switch to evaluate mode
    model.eval()
    # move model to the gpu
    cuda_models = []
    cuda_streams = []
    for idx in range(args.split_factor):
        cuda_streams.append(torch.cuda.Stream())
        cuda_models.append(model.models[idx].cuda(0))
    torch.cuda.synchronize()

    # record time and number of samples
    n_count = 0.0
    start_time = time.time()

    with torch.no_grad():
        for i, (images, target) in enumerate(val_loader):
            images = images.cuda(0, non_blocking=True)
            target = target.cuda(0, non_blocking=True)
            collect_outputs = []

            if args.is_amp:
                with torch.cuda.stream(cuda_streams[0]):
                    with amp.autocast():
                        output_0 = cuda_models[0](images)

                with torch.cuda.stream(cuda_streams[1]):
                    with amp.autocast():
                        output_1 = cuda_models[1](images)

            else:
                for idx in range(args.split_factor):
                    with torch.cuda.stream(cuda_streams[idx]):
                        collect_outputs.append(cuda_models[idx](images))
            torch.cuda.synchronize()

            collect_outputs.extend([output_0, output_1])
            # output is fp16
            outputs = torch.stack(collect_outputs, dim=0)
            ensemble_output = torch.mean(outputs, dim=0)

            # measure accuracy and record loss
            batch_size_now = images.size(0)
            n_count += batch_size_now
            for j in range(args.loop_factor):
                acc1, acc5 = metric.accuracy(outputs[j, ...],
                                             target,
                                             topk=(1, 5))
                top1_all[j].update(acc1[0].item(), batch_size_now)

            # simply average outputs of small networks
            avg_acc1, avg_acc5 = metric.accuracy(ensemble_output,
                                                 target,
                                                 topk=(1, 5))
            avg_top1.update(avg_acc1[0].item(), batch_size_now)
            avg_top5.update(avg_acc5[0].item(), batch_size_now)

            #if i >= 200:
            #	break

            if i % args.print_freq == 0:
                progress.print(i)

        time_cnt = time.time() - start_time

        # print accuracy info
        acc_all = []
        acc_all.append(avg_top1.avg)
        acc_all.append(avg_top5.avg)
        acc_info = '* Acc@1 {:.3f} Acc@5 {:.3f}'.format(acc_all[0], acc_all[1])
        mean_acc = 0.0
        for j in range(args.loop_factor):
            acc_all.append(top1_all[j].avg)
            acc_info += '\t {}_acc@1 {:.3f}'.format(j, top1_all[j].avg)
            mean_acc += top1_all[j].avg
        acc_info += "\t avg_acc {:.3f}".format(mean_acc / args.split_factor)
        print(acc_info)

    print("The tested architecture is {} with split_factor {}".format(
        args.arch, args.split_factor))
    print("The number of the samples is {}".format(n_count))
    print("The total testing time is {} second".format(time_cnt))
    print("The average test time is {}ms per images".format(1000 * time_cnt /
                                                            n_count))

    torch.cuda.empty_cache()
    sys.exit(0)
예제 #9
0
def multigpu_test(args):
    """
	This is a simple program for validating the idea of parallel runing of multiple
	model on multiple gpus.
	"""
    model = splitnet.SplitNet(args,
                              norm_layer=norm.norm(args.norm_mode),
                              criterion=None)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("INFO:PyTorch: => loading checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)
            old_dict = checkpoint['state_dict']
            # orignial ckpt was save as nn.parallel.DistributedDataParallel() object
            old_dict = {
                k.replace("module.models", "models"): v
                for k, v in old_dict.items()
            }
            model.load_state_dict(old_dict)
            print("INFO:PyTorch: => loaded checkpoint"
                  " '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
        else:
            print("INFO:PyTorch: => no checkpoint found at '{}'".format(
                args.resume))

    # accelarate the training
    torch.backends.cudnn.benchmark = True

    val_loader = factory.get_data_loader(args.data,
                                         batch_size=args.eval_batch_size,
                                         crop_size=args.crop_size,
                                         dataset=args.dataset,
                                         split="val",
                                         num_workers=args.workers)
    # record the top1 accuray of each small network
    top1_all = []
    for i in range(args.loop_factor):
        top1_all.append(metric.AverageMeter('{}_Acc@1'.format(i), ':6.2f'))
    avg_top1 = metric.AverageMeter('Avg_Acc@1', ':6.2f')
    avg_top5 = metric.AverageMeter('Avg_Acc@1', ':6.2f')
    progress = metric.ProgressMeter(len(val_loader),
                                    *top1_all,
                                    avg_top1,
                                    avg_top5,
                                    prefix='Test: ')

    # switch to evaluate mode
    model.eval()
    n_count = 0.0

    # move model to the gpu
    cuda_models = []
    for idx in range(args.split_factor):
        cuda_models.append(model.models[idx].cuda(idx))
    start_time = time.time()

    for i, (images, target) in enumerate(val_loader):
        cuda_images = []
        cuda_outpouts = []
        collect_outputs = []
        target = target.cuda(0, non_blocking=True)
        for idx in range(args.split_factor):
            cuda_images.append(images.cuda(idx, non_blocking=True))

        if args.is_amp:
            with amp.autocast():
                for idx in range(args.split_factor):
                    cuda_outpouts.append(cuda_models[idx](cuda_images[idx]))
        else:
            for idx in range(args.split_factor):
                cuda_outpouts.append(cuda_models[idx](cuda_images[idx]))

        for idx in range(args.split_factor):
            # use the first gpu as host gpu
            collect_outputs.append(cuda_outpouts[idx].cuda(0))

        if _GEO_TEST:
            if i == 1:
                print("using geometry mean")
            cmul = 1.0
            for j in range(args.split_factor):
                cmul = cmul * F.softmax(cuda_outpouts[j].cuda(0), dim=-1)
            # ensemble_output = torch.pow(cmul, 1.0 / args.split_factor)
            ensemble_output = torch.sqrt(cmul)
        else:
            outputs = torch.stack(collect_outputs, dim=0)
            ensemble_output = torch.mean(outputs, dim=0)

        batch_size_now = images.size(0)
        """
		for j in range(args.loop_factor):
			acc1, acc5 = metric.accuracy(outputs[j, ...], target, topk=(1, 5))
			top1_all[j].update(acc1[0].item(), batch_size_now)
		"""
        # simply average outputs of small networks
        avg_acc1, avg_acc5 = metric.accuracy(ensemble_output,
                                             target,
                                             topk=(1, 5))
        avg_top1.update(avg_acc1[0].item(), batch_size_now)
        avg_top5.update(avg_acc5[0].item(), batch_size_now)

        n_count += batch_size_now
        """
		if i % args.print_freq == 0:
			progress.print(i)
		"""
    time_cnt = time.time() - start_time
    # print accuracy info
    acc_all = []
    acc_all.append(avg_top1.avg)
    acc_all.append(avg_top5.avg)
    acc_info = '* Acc@1 {:.3f} Acc@5 {:.3f}'.format(acc_all[0], acc_all[1])
    """
	mean_acc = 0.0
	for j in range(args.loop_factor):
		acc_all.append(top1_all[j].avg)
		acc_info += '\t {}_acc@1 {:.3f}'.format(j, top1_all[j].avg)
		mean_acc += top1_all[j].avg
	acc_info += "\t avg_acc {:.3f}".format(mean_acc / args.split_factor)
	"""
    print(acc_info)

    print("multiple GPUs ({})".format(args.is_test_on_multigpus))
    print("The tested architecture is {} with split_factor {}".format(
        args.arch, args.split_factor))
    print("The number of the samples is {}".format(n_count))
    print("The total testing time is {} second".format(time_cnt))
    print("The average test time is {}ms per images".format(1000 * time_cnt /
                                                            n_count))

    torch.cuda.empty_cache()
    sys.exit(0)