Exemplo n.º 1
0
    def __init__(self, args):
        self.args = args
        self.iteration = 0

        # setup data set and data loader
        self.dataloader = create_loader(args)

        # set up losses and metrics
        self.rec_loss_func = {
            key: getattr(loss_module, key)()
            for key, val in args.rec_loss.items()
        }
        self.adv_loss = getattr(loss_module, args.gan_type)()

        # Image generator input: [rgb(3) + mask(1)], discriminator input: [rgb(3)]
        net = importlib.import_module('model.' + args.model)

        self.netG = net.InpaintGenerator(args).cuda()
        self.optimG = torch.optim.Adam(self.netG.parameters(),
                                       lr=args.lrg,
                                       betas=(args.beta1, args.beta2))

        self.netD = net.Discriminator().cuda()
        self.optimD = torch.optim.Adam(self.netD.parameters(),
                                       lr=args.lrd,
                                       betas=(args.beta1, args.beta2))

        self.load()
        if args.distributed:
            self.netG = DDP(self.netG,
                            device_ids=[args.local_rank],
                            output_device=[args.local_rank])
            self.netD = DDP(self.netD,
                            device_ids=[args.local_rank],
                            output_device=[args.local_rank])
Exemplo n.º 2
0
def main():
    args, args_text = _parse_args()

    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    if args.distributed:
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()
    model_name = 'efficientdet_d0'
    data_config = get_efficientdet_config(model_name)

    train_anno_set = 'train2017'
    train_annotation_path = os.path.join(args.data, 'annotations',
                                         f'instances_{train_anno_set}.json')
    train_image_dir = train_anno_set
    dataset_train = CocoDetection(os.path.join(args.data, train_image_dir),
                                  train_annotation_path, data_config)
    print("Length of training dataset {}".format(len(dataset_train)))
    loader_train = create_loader(
        dataset_train,
        input_size=args.input_size,
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        #re_prob=args.reprob,  # FIXME add back various augmentations
        #re_mode=args.remode,
        #re_count=args.recount,
        #re_split=args.resplit,
        #color_jitter=args.color_jitter,
        #auto_augment=args.aa,
        interpolation=args.train_interpolation,
        #mean=data_config['mean'],
        #std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        #collate_fn=collate_fn,
        pin_mem=args.pin_mem,
    )

    print("Iterations per epoch {}".format(
        math.ceil(len(dataset_train) / (args.batch_size * args.world_size))))
    data_time_m = AverageMeter()
    end = time.time()
    if args.local_rank == 0:
        print("Starting to test...")
    for batch_idx, (input, target) in enumerate(loader_train):
        data_time_m.update(time.time() - end)
        if args.local_rank == 0 and batch_idx % 20 == 0:
            print("batch time till {} is {}".format(batch_idx,
                                                    data_time_m.avg))
        end = time.time()
Exemplo n.º 3
0
def valid(valid_ds):
    model.eval()
    valid_loss = 0.
    valid_loader = create_loader(args, valid_ds)
    for (x, x_len, y) in valid_loader:
        with torch.no_grad():
            loss = criterion(model(x, x_len), y)
        valid_loss += loss.item()
    return valid_loss / len(valid_loader)
Exemplo n.º 4
0
def main():
    args = parser.parse_args()
    args.gpu_id = 0

    # Set graph optimization level
    sess_options = onnxruntime.SessionOptions()
    sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
    if args.profile:
        sess_options.enable_profiling = True
    if args.onnx_output_opt:
        sess_options.optimized_model_filepath = args.onnx_output_opt

    session = onnxruntime.InferenceSession(args.onnx_input, sess_options)

    data_config = resolve_data_config(None, args)
    loader = create_loader(
        Dataset(args.data, load_bytes=args.tf_preprocessing),
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        use_prefetcher=False,
        interpolation=data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        crop_pct=data_config['crop_pct'],
        tensorflow_preprocessing=args.tf_preprocessing)

    input_name = session.get_inputs()[0].name

    batch_time = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()
    for i, (input, target) in enumerate(loader):
        # run the net and return prediction
        output = session.run([], {input_name: input.data.numpy()})
        output = output[0]

        # measure accuracy and record loss
        prec1, prec5 = accuracy_np(output, target.numpy())
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s, {ms_avg:.3f} ms/sample) \t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg,
                ms_avg=100 * batch_time.avg / input.size(0), top1=top1, top5=top5))

    print(' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})'.format(
        top1=top1, top1a=100-top1.avg, top5=top5, top5a=100.-top5.avg))
Exemplo n.º 5
0
def valid(args):
    model.eval()
    valid_loader = create_loader(args, valid_ds)
    valid_loss = 0.
    for (x, y, sl) in valid_loader:
        with torch.no_grad():
            logits = model(x, sl)
            loss = criterion(logits, y)
        valid_loss += loss.item()
    return valid_loss / len(valid_loader)
Exemplo n.º 6
0
def test(test_ds):
    model.eval()
    test_loader = create_loader(args, test_ds, shuffle=False)
    y_true, y_pred = [], []
    for (x, x_len, y) in test_loader:
        with torch.no_grad():
            preds = model(x, x_len).argmax(1)
        for i in range(x.size(0)):
            y_true.append(y[i].item())
            y_pred.append(preds[i].item())
    return classification_report(y_true, y_pred, digits=6)
Exemplo n.º 7
0
def test(args):
    model.load_state_dict(torch.load(args.ckpt))
    model.eval()
    test_loader = create_loader(args, test_ds, shuffle=False)
    y_true, y_pred = [], []
    for (x, y, sl) in test_loader:
        with torch.no_grad():
            logits = model(x, sl)  # [B, C]
            preds = logits.argmax(dim=1)
        for i in range(len(x)):
            y_true.append(y[i].item())
            y_pred.append(preds[i].item())
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    return classification_report(y_true, y_pred, digits=4)
Exemplo n.º 8
0
def train(args):
    model.train()
    train_loader = create_loader(args, train_ds)
    train_loss = 0.
    global_steps = 0
    optimizer.zero_grad()
    for i, (x, y, sl) in enumerate(train_loader, 1):
        loss = criterion(model(x, sl), y)
        if args.grad_step != -1:
            loss = loss / args.grad_step
        train_loss += loss.item()
        loss.backward()
        if _grad_step(args, i):
            optimizer.step()
            optimizer.zero_grad()
            global_steps += 1
    return train_loss / global_steps
Exemplo n.º 9
0
def train(train_ds):
    model.train()
    train_loss = 0.
    global_steps = 0.
    train_loader = create_loader(args, train_ds)
    optimizer.zero_grad()
    for i, (x, x_len, y) in enumerate(train_loader, 1):
        loss = criterion(model(x, x_len), y)
        if args.accumulate_grad > 1:
            loss = loss / args.accumulate_grad
        train_loss += loss.item()
        loss.backward()
        if optimizer_step(args, i):
            optimizer.step()
            optimizer.zero_grad()
            global_steps += 1
    return train_loss / global_steps
Exemplo n.º 10
0
def validate(args):
    setup_default_logging()

    def setthresh():
        if args.checkpoint.split("/")[-1].split(
                "_")[0] in getthresholds.keys():
            return getthresholds[args.checkpoint.split("/")[-1].split("_")[0]]
        else:
            a = []
            [a.append(args.threshold) for x in range(4)]
            return a

    threshs = setthresh()
    print(threshs)
    # might as well try to validate something
    args.pretrained = args.pretrained or not args.checkpoint
    args.prefetcher = not args.no_prefetcher

    # create model
    bench = create_model(
        args.model,
        bench_task='predict',
        pretrained=args.pretrained,
        redundant_bias=args.redundant_bias,
        checkpoint_path=args.checkpoint,
        checkpoint_ema=args.use_ema,
    )
    input_size = bench.config.image_size

    param_count = sum([m.numel() for m in bench.parameters()])
    print('Model %s created, param count: %d' % (args.model, param_count))

    bench = bench.cuda()
    if has_amp:
        print('Using AMP mixed precision.')
        bench = amp.initialize(bench, opt_level='O1')
    else:
        print('AMP not installed, running network in FP32.')

    if args.num_gpu > 1:
        bench = torch.nn.DataParallel(bench,
                                      device_ids=list(range(args.num_gpu)))

    if 'test' in args.anno:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'image_info_{args.anno}.json')
        image_dir = args.anno
    else:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'instances_{args.anno}.json')
        image_dir = args.anno
    print(os.path.join(args.data, image_dir), annotation_path)
    dataset = CocoDetection(os.path.join(args.data, image_dir),
                            annotation_path)

    loader = create_loader(dataset,
                           input_size=input_size,
                           batch_size=args.batch_size,
                           use_prefetcher=args.prefetcher,
                           interpolation=args.interpolation,
                           fill_color=args.fill_color,
                           num_workers=args.workers,
                           pin_mem=args.pin_mem,
                           mean=args.mean,
                           std=args.std)
    if 'test' in args.anno:
        threshold = float(args.threshold)
    else:
        threshold = .001
    img_ids = []
    results = []
    writetofilearrtay = []
    bench.eval()
    batch_time = AverageMeter()
    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            output = bench(input, target['img_scale'], target['img_size'])
            output = output.cpu()
            # print(target['img_id'])
            sample_ids = target['img_id'].cpu()

            for index, sample in enumerate(output):
                image_id = int(sample_ids[index])

                for det in sample:
                    score = float(det[4])
                    if score < threshold:  # stop when below this threshold, scores in descending order
                        coco_det = dict(image_id=image_id, category_id=-1)
                        img_ids.append(image_id)
                        results.append(coco_det)
                        break
                    coco_det = dict(image_id=image_id,
                                    bbox=det[0:4].tolist(),
                                    score=score,
                                    category_id=int(det[5]),
                                    sizes=target['img_size'].tolist()[0])
                    img_ids.append(image_id)
                    results.append(coco_det)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.log_freq == 0:
                print(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    .format(
                        i,
                        len(loader),
                        batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                    ))

    if 'test' in args.anno:
        from itertools import groupby
        results.sort(key=lambda x: x['image_id'])

        f = open(
            str(args.model) + "-" + str(args.anno) + "-" + str(min(threshs)) +
            ".txt", "w+")
        # for item in tqdm(writetofilearrtay):
        xxx = 0
        for k, v in tqdm(groupby(results, key=lambda x: x['image_id'])):
            xxx += 1
            f.write(getimageNamefromid(k) +
                    ",")  #print(getimageNamefromid(k),", ")
            for i in v:
                if i['category_id'] > 0:
                    if (i['category_id'] ==1 and i['score'] >= threshs[0] ) or (i['category_id'] ==2 and i['score'] >= threshs[1] ) or \
                      (i['category_id'] ==3 and i['score'] >= threshs[2] ) or (i['category_id'] ==4 and i['score'] >= threshs[3] ) :
                        f.write(
                            str(round(i['category_id'])) + " " +
                            str(round(i['bbox'][0])) + " " +
                            str(round(i['bbox'][1])) + " " + str(
                                round(
                                    float(i['bbox'][0]) +
                                    float(i['bbox'][2]))) + " " + str(
                                        round(
                                            float(i['bbox'][1]) +
                                            float(i['bbox'][3]))) + " ")
            f.write('\n')
            # print(i['category_id']," ",i['bbox'][0]," ",i['bbox'][1]," ",i['bbox'][2]," ",i['bbox'][3]," ")
        print("generated lines:", xxx)
        f.close()

    #   f.close()
    if 'test' not in args.anno:
        array_of_dm = []
        array_of_gt = []

        i = 0
        # if 'test' in args.anno :

        for _, item in tqdm(dataset):
            # if item["img_id"] == "1000780" :
            # print(item)
            for i in range(len(item['cls'])):
                # print(str(item["img_id"]),)
                array_of_gt.append(
                    BoundingBox(imageName=str(item["img_id"]),
                                classId=item["cls"][i],
                                x=item["bbox"][i][1] * item['img_scale'],
                                y=item["bbox"][i][0] * item['img_scale'],
                                w=item["bbox"][i][3] * item['img_scale'],
                                h=item["bbox"][i][2] * item['img_scale'],
                                typeCoordinates=CoordinatesType.Absolute,
                                bbType=BBType.GroundTruth,
                                format=BBFormat.XYX2Y2,
                                imgSize=(item['img_size'][0],
                                         item['img_size'][1])))

        for item in tqdm(results):
            if item["category_id"] >= 0:
                array_of_dm.append(
                    BoundingBox(imageName=str(item["image_id"]),
                                classId=item["category_id"],
                                classConfidence=item["score"],
                                x=item['bbox'][0],
                                y=item['bbox'][1],
                                w=item['bbox'][2],
                                h=item['bbox'][3],
                                typeCoordinates=CoordinatesType.Absolute,
                                bbType=BBType.Detected,
                                format=BBFormat.XYWH,
                                imgSize=(item['sizes'][0], item['sizes'][1])))
        myBoundingBoxes = BoundingBoxes()
        # # # # Add all bounding boxes to the BoundingBoxes object:
        for box in (array_of_gt):
            myBoundingBoxes.addBoundingBox(box)
        for dm in array_of_dm:
            myBoundingBoxes.addBoundingBox(dm)

        evaluator = Evaluator()
        f1res = []
        f1resd0 = []
        f1resd10 = []
        f1resd20 = []
        f1resd40 = []
        for conf in tqdm(range(210, 600, 1)):
            metricsPerClass = evaluator.GetPascalVOCMetrics(
                myBoundingBoxes, IOUThreshold=0.5, ConfThreshold=conf / 1000.0)

            totalTP = 0
            totalp = 0
            totalFP = 0
            tp = []
            fp = []
            ta = []
            # print('-------')
            for mc in metricsPerClass:
                tp.append(mc['total TP'])
                fp.append(mc['total FP'])
                ta.append(mc['total positives'])

                totalFP = totalFP + mc['total FP']
                totalTP = totalTP + mc['total TP']
                totalp = totalp + (mc['total positives'])

            # print(totalTP," ",totalFP," ",totalp)
            if totalTP + totalFP == 0:
                p = -1
            else:
                p = totalTP / (totalTP + totalFP)
            if totalp == 0:
                r = -1
            else:
                r = totalTP / (totalp)
            f1_dict = dict(tp=totalTP,
                           fp=totalFP,
                           totalp=totalp,
                           conf=conf / 1000.0,
                           prec=p,
                           rec=r,
                           f1score=(2 * p * r) / (p + r))
            f1res.append(f1_dict)
            #must clean these parts
            f1resd0.append(
                dict(tp=tp[0],
                     fp=fp[0],
                     totalp=ta[0],
                     conf=conf / 1000.0,
                     prec=tp[0] / (tp[0] + fp[0]),
                     rec=tp[0] / ta[0],
                     f1score=(2 * (tp[0] / (tp[0] + fp[0])) *
                              (tp[0] / ta[0])) / ((tp[0] / (tp[0] + fp[0])) +
                                                  (tp[0] / ta[0]))))

            f1resd10.append(
                dict(tp=tp[1],
                     fp=fp[1],
                     totalp=ta[1],
                     conf=conf / 1000.0,
                     prec=tp[1] / (tp[1] + fp[1]),
                     rec=tp[1] / ta[1],
                     f1score=(2 * (tp[1] / (tp[1] + fp[1])) *
                              (tp[1] / ta[1])) / ((tp[1] / (tp[1] + fp[1])) +
                                                  (tp[1] / ta[1]))))

            f1resd20.append(
                dict(tp=tp[2],
                     fp=fp[2],
                     totalp=ta[2],
                     conf=conf / 1000.0,
                     prec=tp[2] / (tp[2] + fp[2]),
                     rec=tp[2] / ta[2],
                     f1score=(2 * (tp[2] / (tp[2] + fp[2])) *
                              (tp[2] / ta[2])) / ((tp[2] / (tp[2] + fp[2])) +
                                                  (tp[2] / ta[2]))))

            f1resd40.append(
                dict(tp=tp[3],
                     fp=fp[3],
                     totalp=ta[3],
                     conf=conf / 1000.0,
                     prec=tp[3] / (tp[3] + fp[3]),
                     rec=tp[3] / ta[3],
                     f1score=(2 * (tp[3] / (tp[3] + fp[3])) *
                              (tp[3] / ta[3])) / ((tp[3] / (tp[3] + fp[3])) +
                                                  (tp[3] / ta[3]))))

        sortedf1 = sorted(f1res, key=lambda k: k['f1score'], reverse=True)

        f1resd0 = sorted(f1resd0, key=lambda k: k['f1score'], reverse=True)
        f1resd10 = sorted(f1resd10, key=lambda k: k['f1score'], reverse=True)
        f1resd20 = sorted(f1resd20, key=lambda k: k['f1score'], reverse=True)
        f1resd40 = sorted(f1resd40, key=lambda k: k['f1score'], reverse=True)

        print(sortedf1[0])
        print("\n\n")
        print(f1resd0[0])
        print(f1resd10[0])
        print(f1resd20[0])
        print(f1resd40[0])
        # sortedf1 = sorted(f1res, key=lambda k: k['f1score'],reverse=True)
        # print(sortedf1[0:2])
        # json.dump(results, open(args.results, 'w'), indent=4)
        json.dump(results, open(args.results, 'w'), indent=4)
        # coco_results = dataset.coco.loadRes(args.results)
        # coco_eval = COCOeval(dataset.coco, coco_results, 'bbox')
        # coco_eval.params.imgIds = img_ids  # score only ids we've used
        # coco_eval.evaluate()
        # coco_eval.accumulate()
        # coco_eval.summarize()
        # print(coco_eval.eval['params'])

    json.dump(results, open(args.results, 'w'), indent=4)

    return results
Exemplo n.º 11
0
def main():
    
    # setting arguments
    parser = argparse.ArgumentParser(description='Test arguments')
    parser.add_argument('--opt', type=str, required=True, help='path to test yaml file')
    parser.add_argument('--name', type=str, required=True, help='test log file name')
    parser.add_argument('--dataset_name', type=str, default=None)
    parser.add_argument('--scale', type=int, required=True)
    parser.add_argument('--gpu_ids', type=str, default=None, help='which gpu to use')
    parser.add_argument('--which_model', type=str, required=True, help='which pretrained model')
    parser.add_argument('--pretrained', type=str, required=True, help='pretrained path')

    args = parser.parse_args()
    args, lg = test_parse(args)
    pn = 50
    half_pn = pn//2
    lg.info('\n' + '-'*pn + 'General INFO' + '-'*pn)

    # create test dataloader
    test_loader_list = []
    for i in range(len(args['dataset_list'])):
        # get single dataset and dataloader
        single_dataset_args = copy.deepcopy(args)
        single_dataset_args['datasets']['test']['dataroot_HR'] = single_dataset_args['datasets']['test']['dataroot_HR'][i]
        single_dataset_args['datasets']['test']['dataroot_LR'] = single_dataset_args['datasets']['test']['dataroot_LR'][i]
   
        test_dataset = create_dataset(single_dataset_args['datasets']['test'])
        test_loader = create_loader(test_dataset, args['datasets']['test'])
        test_loader_list.append(test_loader)

    # create model
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = create_model(args['networks']).to(device)
    lg.info('Create model: [{}]'.format(args['networks']['which_model']))
    scale = args['scale']

    # calc number of parameters
    in_ = torch.randn(1, 3, round(720/scale), round(1280/scale)).to(device)
    params, GFlops = summary(model, in_)
    lg.info('Total parameters: [{:.3f}M], GFlops: [{:.4f}G]'.format(params / 1e6, GFlops / 1e9))

    # load pretrained model
    state_dict = torch.load(args['networks']['pretrained'])
    new_state_dict = {}
    for k, v in state_dict.items():
        if k[:7] == 'module.':
            new_state_dict[k[7:]] = v
        else:
            new_state_dict[k] = v
    model.load_state_dict(new_state_dict, strict=True)
    lg.info('Load pretrained from: [{}]'.format(args['networks']['pretrained']))

    for i, test_loader in enumerate(test_loader_list):
        dataset_name = args['dataset_list'][i]
        l = (12-len(dataset_name)) // 2
        e = len(dataset_name) % 2
        lg.info('\n\n' + '-'*(pn+l) + dataset_name + '-'*(pn+l+e))
        lg.info('Number of [{}] images: [{}]'.format(dataset_name, len(test_loader)))

        # calculate cuda time
        avg_test_time = 0.0
        if args['calc_cuda_time'] and 'Set5' in dataset_name:
            lg.info('Start calculating cuda time...')
            avg_test_time = calc_cuda_time(test_loader, model)
            lg.info('Average cuda time on [{}]: [{:.5f}ms]'.format(dataset_name, avg_test_time))
    
        # calucate psnr and ssim
        psnr_list = []
        ssim_list = []

        model.eval()
        for iter, data in enumerate(test_loader):
            lr = data['LR'].to(device)
            hr = data['HR']
        
            # calculate evaluation metrics
            with torch.no_grad():
                sr = model(lr)
            #save(args['networks']['which_model'], dataset_name, data['filename'][0], tensor2np(sr))
            psnr, ssim = calc_metrics(tensor2np(sr), tensor2np(hr), crop_border=scale, test_Y=True)
            psnr_list.append(psnr)
            ssim_list.append(ssim)

            lg.info('[{:03d}/{:03d}] || PSNR/SSIM: {:.2f}/{:.4f} || {}'.format(iter+1, len(test_loader), psnr, ssim, data['filename']))


        avg_psnr = sum(psnr_list) / len(psnr_list)
        avg_ssim = sum(ssim_list) / len(ssim_list)

        if avg_test_time > 0:
            lg.info('Average PSNR: {:.2f}  Average SSIM: {:.4f}, Average time: {:.5f}ms'.format(avg_psnr, avg_ssim, avg_test_time))
        else:
            lg.info('Average PSNR: {:.2f}  Average SSIM: {:.4f}'.format(avg_psnr, avg_ssim))
        
    lg.info('\n' + '-'*pn + '---Finish---' + '-'*pn)
Exemplo n.º 12
0
def main():
    args = parser.parse_args()

    if not args.checkpoint and not args.pretrained:
        args.pretrained = True

    amp_autocast = suppress  # do nothing
    if args.amp:
        if not has_native_amp:
            print(
                "Native Torch AMP is not available (requires torch >= 1.6), using FP32."
            )
        else:
            amp_autocast = torch.cuda.amp.autocast

    # create model
    model = geffnet.create_model(args.model,
                                 num_classes=args.num_classes,
                                 in_chans=3,
                                 pretrained=args.pretrained,
                                 checkpoint_path=args.checkpoint,
                                 scriptable=args.torchscript)

    if args.channels_last:
        model = model.to(memory_format=torch.channels_last)

    if args.torchscript:
        torch.jit.optimized_execution(True)
        model = torch.jit.script(model)

    print('Model %s created, param count: %d' %
          (args.model, sum([m.numel() for m in model.parameters()])))

    data_config = resolve_data_config(model, args)

    criterion = nn.CrossEntropyLoss()

    if not args.no_cuda:
        if args.num_gpu > 1:
            model = torch.nn.DataParallel(model,
                                          device_ids=list(range(
                                              args.num_gpu))).cuda()
        else:
            model = model.cuda()
        criterion = criterion.cuda()

    loader = create_loader(Dataset(args.data,
                                   load_bytes=args.tf_preprocessing),
                           input_size=data_config['input_size'],
                           batch_size=args.batch_size,
                           use_prefetcher=not args.no_cuda,
                           interpolation=data_config['interpolation'],
                           mean=data_config['mean'],
                           std=data_config['std'],
                           num_workers=args.workers,
                           crop_pct=data_config['crop_pct'],
                           tensorflow_preprocessing=args.tf_preprocessing)

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()
    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            if not args.no_cuda:
                target = target.cuda()
                input = input.cuda()
            if args.channels_last:
                input = input.contiguous(memory_format=torch.channels_last)

            # compute output
            with amp_autocast():
                output = model(input)
                loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print(
                    'Test: [{0}/{1}]\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s) \t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                    'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                        i,
                        len(loader),
                        batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                        loss=losses,
                        top1=top1,
                        top5=top5))

    print(
        ' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})'
        .format(top1=top1,
                top1a=100 - top1.avg,
                top5=top5,
                top5a=100. - top5.avg))
Exemplo n.º 13
0
def main():
    args = parser.parse_args()

    args.prefetcher = not args.no_prefetcher
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
        if args.distributed and args.num_gpu > 1:
            print(
                'Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.'
            )
            args.num_gpu = 1

    args.device = 'cuda:0'
    args.world_size = 1
    r = -1
    if args.distributed:
        args.num_gpu = 1
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        r = torch.distributed.get_rank()

    if args.distributed:
        print(
            'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
            % (r, args.world_size))
    else:
        print('Training with a single process on %d GPUs.' % args.num_gpu)

    # FIXME seed handling for multi-process distributed?
    torch.manual_seed(args.seed)

    output_dir = ''
    if args.local_rank == 0:
        if args.output:
            output_base = args.output
        else:
            output_base = './output'
        exp_name = '-'.join([
            datetime.now().strftime("%Y%m%d-%H%M%S"), args.model,
            str(args.img_size)
        ])
        output_dir = get_outdir(output_base, 'train', exp_name)

    model = create_model(args.model,
                         pretrained=args.pretrained,
                         num_classes=args.num_classes,
                         drop_rate=args.drop,
                         global_pool=args.gp,
                         bn_tf=args.bn_tf,
                         bn_momentum=args.bn_momentum,
                         bn_eps=args.bn_eps,
                         checkpoint_path=args.initial_checkpoint)

    print('Model %s created, param count: %d' %
          (args.model, sum([m.numel() for m in model.parameters()])))

    data_config = resolve_data_config(model,
                                      args,
                                      verbose=args.local_rank == 0)

    # optionally resume from a checkpoint
    start_epoch = 0
    optimizer_state = None
    if args.resume:
        optimizer_state, start_epoch = resume_checkpoint(
            model, args.resume, args.start_epoch)

    if args.num_gpu > 1:
        if args.amp:
            print(
                'Warning: AMP does not work well with nn.DataParallel, disabling. '
                'Use distributed mode for multi-GPU AMP.')
            args.amp = False
        model = nn.DataParallel(model,
                                device_ids=list(range(args.num_gpu))).cuda()
    else:
        model.cuda()

    optimizer = create_optimizer(args, model)
    if optimizer_state is not None:
        optimizer.load_state_dict(optimizer_state)

    if has_apex and args.amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
        use_amp = True
        print('AMP enabled')
    else:
        use_amp = False
        print('AMP disabled')

    if args.distributed:
        model = DDP(model, delay_allreduce=True)

    lr_scheduler, num_epochs = create_scheduler(args, optimizer)
    if start_epoch > 0:
        lr_scheduler.step(start_epoch)
    if args.local_rank == 0:
        print('Scheduled epochs: ', num_epochs)

    train_dir = os.path.join(args.data, 'train')
    if not os.path.exists(train_dir):
        print('Error: training folder does not exist at: %s' % train_dir)
        exit(1)
    dataset_train = Dataset(train_dir)

    collate_fn = None
    if args.prefetcher and args.mixup > 0:
        collate_fn = FastCollateMixup(args.mixup, args.smoothing,
                                      args.num_classes)

    loader_train = create_loader(
        dataset_train,
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        rand_erase_prob=args.reprob,
        rand_erase_mode=args.remode,
        interpolation=
        'random',  # FIXME cleanly resolve this? data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        collate_fn=collate_fn,
    )

    eval_dir = os.path.join(args.data, 'validation')
    if not os.path.isdir(eval_dir):
        print('Error: validation folder does not exist at: %s' % eval_dir)
        exit(1)
    dataset_eval = Dataset(eval_dir)

    loader_eval = create_loader(
        dataset_eval,
        input_size=data_config['input_size'],
        batch_size=4 * args.batch_size,
        is_training=False,
        use_prefetcher=args.prefetcher,
        interpolation=data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
    )

    if args.mixup > 0.:
        # smoothing is handled with mixup label transform
        train_loss_fn = SoftTargetCrossEntropy().cuda()
        validate_loss_fn = nn.CrossEntropyLoss().cuda()
    elif args.smoothing:
        train_loss_fn = LabelSmoothingCrossEntropy(
            smoothing=args.smoothing).cuda()
        validate_loss_fn = nn.CrossEntropyLoss().cuda()
    else:
        train_loss_fn = nn.CrossEntropyLoss().cuda()
        validate_loss_fn = train_loss_fn

    eval_metric = args.eval_metric
    saver = None
    if output_dir:
        decreasing = True if eval_metric == 'loss' else False
        saver = CheckpointSaver(checkpoint_dir=output_dir,
                                decreasing=decreasing)
    best_metric = None
    best_epoch = None
    try:
        for epoch in range(start_epoch, num_epochs):
            if args.distributed:
                loader_train.sampler.set_epoch(epoch)

            train_metrics = train_epoch(epoch,
                                        model,
                                        loader_train,
                                        optimizer,
                                        train_loss_fn,
                                        args,
                                        lr_scheduler=lr_scheduler,
                                        saver=saver,
                                        output_dir=output_dir,
                                        use_amp=use_amp)

            eval_metrics = validate(model, loader_eval, validate_loss_fn, args)

            if lr_scheduler is not None:
                lr_scheduler.step(epoch, eval_metrics[eval_metric])

            update_summary(epoch,
                           train_metrics,
                           eval_metrics,
                           os.path.join(output_dir, 'summary.csv'),
                           write_header=best_metric is None)

            if saver is not None:
                # save proper checkpoint with eval metric
                best_metric, best_epoch = saver.save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'arch': args.model,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'args': args,
                    },
                    epoch=epoch + 1,
                    metric=eval_metrics[eval_metric])

    except KeyboardInterrupt:
        pass
    if best_metric is not None:
        print('*** Best metric: {0} (epoch {1})'.format(
            best_metric, best_epoch))
Exemplo n.º 14
0
def main():
    setup_default_logging()
    args, args_text = _parse_args()

    args.pretrained_backbone = not args.no_pretrained_backbone
    args.prefetcher = not args.no_prefetcher
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    if args.distributed:
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()
    assert args.rank >= 0

    if args.distributed:
        logging.info(
            'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
            % (args.rank, args.world_size))
    else:
        logging.info('Training with a single process on 1 GPU.')

    torch.manual_seed(args.seed + args.rank)

    model = create_model(
        args.model,
        bench_task='train',
        pretrained=args.pretrained,
        pretrained_backbone=args.pretrained_backbone,
        redundant_bias=args.redundant_bias,
        checkpoint_path=args.initial_checkpoint,
    )
    # FIXME decide which args to keep and overlay on config / pass to backbone
    #     num_classes=args.num_classes,
    #     drop_rate=args.drop,
    #     drop_path_rate=args.drop_path,
    #     drop_block_rate=args.drop_block,
    input_size = model.config.image_size

    if args.local_rank == 0:
        logging.info('Model %s created, param count: %d' %
                     (args.model, sum([m.numel()
                                       for m in model.parameters()])))

    model.cuda()
    optimizer = create_optimizer(args, model)
    use_amp = False
    if has_apex and args.amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
        use_amp = True
    if args.local_rank == 0:
        logging.info('NVIDIA APEX {}. AMP {}.'.format(
            'installed' if has_apex else 'not installed',
            'on' if use_amp else 'off'))

    # optionally resume from a checkpoint
    resume_state = {}
    resume_epoch = None
    if args.resume:
        resume_state, resume_epoch = resume_checkpoint(unwrap_bench(model),
                                                       args.resume)
    if resume_state and not args.no_resume_opt:
        if 'optimizer' in resume_state:
            if args.local_rank == 0:
                logging.info('Restoring Optimizer state from checkpoint')
            optimizer.load_state_dict(resume_state['optimizer'])
        if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__:
            if args.local_rank == 0:
                logging.info('Restoring NVIDIA AMP state from checkpoint')
            amp.load_state_dict(resume_state['amp'])
    del resume_state

    model_ema = None
    if args.model_ema:
        # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
        model_ema = ModelEma(model, decay=args.model_ema_decay)
        #resume=args.resume)  # FIXME bit of a mess with bench
        if args.resume:
            load_checkpoint(unwrap_bench(model_ema), args.resume, use_ema=True)

    if args.distributed:
        if args.sync_bn:
            try:
                if has_apex:
                    model = convert_syncbn_model(model)
                else:
                    model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
                        model)
                if args.local_rank == 0:
                    logging.info(
                        'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using '
                        'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.'
                    )
            except Exception as e:
                logging.error(
                    'Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1'
                )
        if has_apex:
            model = DDP(model, delay_allreduce=True)
        else:
            if args.local_rank == 0:
                logging.info(
                    "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP."
                )
            model = DDP(model,
                        device_ids=[args.local_rank
                                    ])  # can use device str in Torch >= 1.1
        # NOTE: EMA model does not need to be wrapped by DDP

    lr_scheduler, num_epochs = create_scheduler(args, optimizer)
    start_epoch = 0
    if args.start_epoch is not None:
        # a specified start_epoch will always override the resume epoch
        start_epoch = args.start_epoch
    elif resume_epoch is not None:
        start_epoch = resume_epoch
    if lr_scheduler is not None and start_epoch > 0:
        lr_scheduler.step(start_epoch)

    if args.local_rank == 0:
        logging.info('Scheduled epochs: {}'.format(num_epochs))

    # train_anno_set = 'train2017'
    # train_annotation_path = os.path.join(args.data, 'annotations', f'instances_{train_anno_set}.json')
    # train_image_dir = train_anno_set
    #dataset_train = CocoDetection("/workspace/data/images",
    #                            "/workspace/data/datatrain90n.json")
    train_anno_set = 'train'
    train_annotation_path = os.path.join(args.data, 'annotations',
                                         f'instances_{train_anno_set}.json')
    train_image_dir = train_anno_set
    dataset_train = CocoDetection(os.path.join(args.data, train_image_dir),
                                  train_annotation_path)
    # FIXME cutmix/mixup worth investigating?
    # collate_fn = None
    # if args.prefetcher and args.mixup > 0:
    #     collate_fn = FastCollateMixup(args.mixup, args.smoothing, args.num_classes)

    loader_train = create_loader(
        dataset_train,
        input_size=input_size,
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        #re_prob=args.reprob,  # FIXME add back various augmentations
        #re_mode=args.remode,
        #re_count=args.recount,
        #re_split=args.resplit,
        #color_jitter=args.color_jitter,
        #auto_augment=args.aa,
        interpolation=args.train_interpolation,
        mean=[0.4533, 0.4744,
              0.4722],  #[0.4846, 0.5079, 0.5005],#[0.485, 0.456, 0.406],
        std=[0.2823, 0.2890,
             0.3084],  #[0.2687, 0.2705, 0.2869],#[0.485, 0.456, 0.406],
        num_workers=args.workers,
        distributed=args.distributed,
        #collate_fn=collate_fn,
        pin_mem=args.pin_mem,
    )
    train_anno_set = 'val'
    train_annotation_path = os.path.join(args.data, 'annotations',
                                         f'instances_{train_anno_set}.json')
    train_image_dir = train_anno_set
    dataset_eval = CocoDetection(os.path.join(args.data, train_image_dir),
                                 train_annotation_path)
    # train_anno_set = 'val'
    # train_annotation_path = os.path.join(args.data, 'annotations', f'instances_{train_anno_set}.json')
    # train_image_dir = train_anno_set
    #   dataset_eval = CocoDetection("/workspace/data/val/images",
    #                               "/workspace/data/dataval90n.json")
    loader_eval = create_loader(
        dataset_eval,
        input_size=input_size,
        batch_size=args.validation_batch_size_multiplier * args.batch_size,
        is_training=False,
        use_prefetcher=args.prefetcher,
        interpolation=args.interpolation,
        mean=[0.4535, 0.4744, 0.4724],  #[0.4851, 0.5083, 0.5009],
        std=[0.2835, 0.2903, 0.3098],  #[0.2690, 0.2709, 0.2877],
        num_workers=args.workers,
        #distributed=args.distributed,
        pin_mem=args.pin_mem,
    )

    # for xx,item in dataset_train :
    #     print("out",type(xx))

    #     break

    # exit()
    array_of_gt = []
    if args.local_rank == 0:
        for _, item in tqdm(dataset_eval):
            # print(item)
            for i in range(len(item['cls'])):
                array_of_gt.append(
                    BoundingBox(imageName=str(item["img_id"]),
                                classId=item["cls"][i],
                                x=item["bbox"][i][1] * item['img_scale'],
                                y=item["bbox"][i][0] * item['img_scale'],
                                w=item["bbox"][i][3] * item['img_scale'],
                                h=item["bbox"][i][2] * item['img_scale'],
                                typeCoordinates=CoordinatesType.Absolute,
                                bbType=BBType.GroundTruth,
                                format=BBFormat.XYX2Y2,
                                imgSize=(item['img_size'][0],
                                         item['img_size'][1])))

    evaluator = COCOEvaluator(dataset_eval.coco,
                              distributed=args.distributed,
                              gtboxes=array_of_gt)

    eval_metric = args.eval_metric
    best_metric = None
    best_epoch = None
    saver = None
    output_dir = ''
    if args.local_rank == 0:
        output_base = args.output if args.output else './output'
        exp_name = '-'.join(
            [datetime.now().strftime("%Y%m%d-%H%M%S"), args.model])
        output_dir = get_outdir(output_base, 'train', exp_name)
        decreasing = True if eval_metric == 'loss' else False
        saver = CheckpointSaver(checkpoint_dir=output_dir,
                                decreasing=decreasing)
        with open(os.path.join(output_dir, 'args.yaml'), 'w') as f:
            f.write(args_text)


#     print(model)
    try:
        for epoch in range(start_epoch, num_epochs):
            if args.distributed:
                loader_train.sampler.set_epoch(epoch)

            train_metrics = train_epoch(epoch,
                                        model,
                                        loader_train,
                                        optimizer,
                                        args,
                                        lr_scheduler=lr_scheduler,
                                        saver=saver,
                                        output_dir=output_dir,
                                        use_amp=use_amp,
                                        model_ema=model_ema)

            if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
                if args.local_rank == 0:
                    logging.info(
                        "Distributing BatchNorm running means and vars")
                distribute_bn(model, args.world_size, args.dist_bn == 'reduce')

            # the overhead of evaluating with coco style datasets is fairly high, so just ema or non, not both
            if model_ema is not None:
                if args.distributed and args.dist_bn in ('broadcast',
                                                         'reduce'):
                    distribute_bn(model_ema, args.world_size,
                                  args.dist_bn == 'reduce')

                eval_metrics = validate(model_ema.ema,
                                        loader_eval,
                                        args,
                                        evaluator,
                                        log_suffix=' (EMA)')
            else:
                eval_metrics = validate(model, loader_eval, args, evaluator)

            if lr_scheduler is not None:
                # step LR for next epoch
                lr_scheduler.step(epoch + 1, eval_metrics[eval_metric])

            if saver is not None:
                update_summary(epoch,
                               train_metrics,
                               eval_metrics,
                               os.path.join(output_dir, 'summary.csv'),
                               write_header=best_metric is None)

                # save proper checkpoint with eval metric
                save_metric = eval_metrics[eval_metric]
                best_metric, best_epoch = saver.save_checkpoint(
                    unwrap_bench(model),
                    optimizer,
                    args,
                    epoch=epoch,
                    model_ema=unwrap_bench(model_ema),
                    metric=save_metric,
                    use_amp=use_amp)

    except KeyboardInterrupt:
        pass
    if best_metric is not None:
        logging.info('*** Best metric: {0} (epoch {1})'.format(
            best_metric, best_epoch))
Exemplo n.º 15
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    logging.info("unparsed_args = %s", unparsed)
    num_gpus = torch.cuda.device_count()
    genotype = eval("genotypes.%s" % args.arch)
    print('---------Genotype---------')
    logging.info(genotype)
    print('--------------------------')
    model = Network(args.init_channels, CLASSES, args.layers, args.auxiliary,
                    genotype, args.use_dropout)

    if num_gpus > 1:
        model = nn.DataParallel(model)
        model = model.cuda()
    else:
        model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    if args.load_path is not None:
        pretrained_dict = torch.load(os.path.expanduser(args.load_path),
                                     map_location=lambda storage, loc: storage)
        model_dict = model.state_dict()  # 获取模型的参数字典
        model_dict.update(pretrained_dict['state_dict']
                          )  # pretrained_dict与model_dict相同key的value的维度必须相同
        model.load_state_dict(model_dict)  # 更新模型权重
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    criterion_smooth = CrossEntropyLabelSmooth(CLASSES, args.label_smooth)
    criterion_smooth = criterion_smooth.cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    dataset_train = ImageList(root=args.root_path,
                              fileList=args.root_path + '/' + args.trainFile)
    collate_fn = None
    if args.prefetcher and args.mixup > 0:
        collate_fn = FastCollateMixup(args.mixup, args.smoothing,
                                      args.num_classes)
    num_train = len(dataset_train)
    train_queue = create_loader(
        dataset_train,  # here call the transform implicitly
        input_size=144,
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        rand_erase_prob=args.reprob,
        rand_erase_mode=args.remode,
        color_jitter=args.color_jitter,
        interpolation=
        'random',  # FIXME cleanly resolve this? data_config['interpolation'],
        # mean=data_config['mean'],
        # std=data_config['std'],
        num_workers=args.workers,
        collate_fn=collate_fn,
        auto_augment=args.aa,
        use_aug=True)

    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, gamma=args.gamma)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))
    best_LFWACC = 0
    lr = args.learning_rate
    writer = SummaryWriter(os.path.join(args.save, 'tensorboard'),
                           comment='pcdarts-for-LFW')
    for epoch in range(args.epochs):
        if args.lr_scheduler == 'cosine':
            scheduler.step()
            current_lr = scheduler.get_lr()[0]
        elif args.lr_scheduler == 'linear':
            current_lr = adjust_lr(optimizer, epoch)
        else:
            print('Wrong lr type, exit')
            sys.exit(1)
        logging.info('Epoch: %d lr %e', epoch, current_lr)
        if epoch < 5 and args.load_path is None:  # and args.batch_size > 256:
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr * (epoch + 1) / 5.0
                current_lr = lr * (epoch + 1) / 5.0
            logging.info('Warming-up Epoch: %d, LR: %e', epoch, current_lr)
        if num_gpus > 1:
            model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        else:
            model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        epoch_start = time.time()
        train_acc, train_obj = train(train_queue, model, criterion_smooth,
                                     optimizer)
        logging.info('Train_acc: %f', train_acc)
        writer.add_scalar('lr', current_lr, epoch)
        writer.add_scalar('train_acc', train_acc, epoch)
        writer.add_scalar('train_loss', train_obj, epoch)

        if (epoch >= 50 and
            (epoch + 1) % 2 == 0) or args.load_path is not None:
            # valid_acc, valid_obj = infer(valid_queue, model, criterion)
            # logging.info('valid_acc %f', valid_acc)
            LFWACC, std, thd = lfw_eval(args, model)
            logging.info('lfw_eval LFW_ACC:%f LFW_std:%f LFW_thd:%f', LFWACC,
                         std, thd)
            writer.add_scalar('LFW_ACC', LFWACC, epoch)
            writer.add_scalar('LFW_std', std, epoch)
            writer.add_scalar('LFW_thd', thd, epoch)
            is_best = False
            if LFWACC >= best_LFWACC:
                best_LFWACC = LFWACC
                is_best = True
                logging.info('lfw_eval BEST_LFW_ACC:%f', best_LFWACC)
            utils.save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_LFWACC': best_LFWACC,
                    'optimizer': optimizer.state_dict(),
                }, is_best, args.save)
    writer.close()
Exemplo n.º 16
0
def main():
  if not torch.cuda.is_available():
    logging.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(args.gpu)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled = True
  torch.cuda.manual_seed(args.seed)
  logging.info('gpu device = %d' % args.gpu)
  logging.info("args = %s", args)

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()
  model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.dropout_type)
  model = model.cuda()
  logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

  optimizer = torch.optim.SGD(
      model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay)

  # train_transform, valid_transform = utils._data_transforms_cifar10(args)
  # if args.set=='cifar100':
  #     train_data = dset.CIFAR100(root=args.data, train=True, download=False, transform=train_transform)
  # else:
  #     train_data = dset.CIFAR10(root=args.data, train=True, download=False, transform=train_transform)

  dataset_train = ImageList(root=args.data, fileList=args.data + '/' + args.trainFile)

  collate_fn = None
  if args.prefetcher and args.mixup > 0:
    collate_fn = FastCollateMixup(args.mixup, args.smoothing, args.num_classes)
  num_train = len(dataset_train)
  # indices = list(range(num_train))
  # randint(args.data_portion * num_train)

  indices = np.linspace(0, num_train-1, args.data_portion*num_train, dtype=np.int)
  random.shuffle(indices)
  num_train = len(indices)
  # patch = int(np.floor(args.data_portion * num_train))
  split = int(np.floor(args.train_portion * num_train))

  train_queue = create_loader(
    dataset_train,  # here call the transform implicitly
    input_size=144,
    batch_size=args.batch_size,
    is_training=True,
    use_prefetcher=args.prefetcher,
    rand_erase_prob=args.reprob,
    rand_erase_mode=args.remode,
    color_jitter=args.color_jitter,
    interpolation='random',  # FIXME cleanly resolve this? data_config['interpolation'],
    # mean=data_config['mean'],
    # std=data_config['std'],
    num_workers=args.workers,
    sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
    collate_fn=collate_fn,
    auto_augment=args.aa
  )
  valid_queue = create_loader(
    dataset_train,  # here call the transform implicitly
    input_size=144,
    batch_size=args.batch_size,
    is_training=True,
    use_prefetcher=args.prefetcher,
    rand_erase_prob=args.reprob,
    rand_erase_mode=args.remode,
    color_jitter=args.color_jitter,
    interpolation='random',  # FIXME cleanly resolve this? data_config['interpolation'],
    # mean=data_config['mean'],
    # std=data_config['std'],
    num_workers=args.workers,
    sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
    collate_fn=collate_fn,
    auto_augment=args.aa
  )

  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs), eta_min=args.learning_rate_min)

  architect = Architect(model, args)
  writer = SummaryWriter(os.path.join(args.save, 'tensorboard'))
  for epoch in range(args.epochs):
    scheduler.step()
    lr = scheduler.get_lr()[0]
    logging.info('epoch %d lr %e', epoch, lr)

    genotype = model.genotype()
    logging.info('genotype = %s', genotype)

    # training
    train_acc, train_obj = train(train_queue, valid_queue, model, architect,
                                 criterion, optimizer, lr, epoch, args.arch_epoch, writer)
    logging.info('train_acc %f', train_acc)
    writer.add_scalar('lr', lr, epoch)
    writer.add_scalar('train_acc', train_acc, epoch)
    writer.add_scalar('train_loss', train_obj, epoch)
    # validation
    # if (epoch+1) >= args.valid_epoch and (epoch+1) % 5 == 0:
    #   LFWACC, std, thd = lfw_eval(args, model)
    #   logging.info('lfw_eval LFW_ACC:%f LFW_std:%f LFW_thd:%f', LFWACC,std,thd)
    #   writer.add_scalar('LFW_ACC', LFWACC, epoch)
    #   writer.add_scalar('LFW_std', std, epoch)
    #   writer.add_scalar('LFW_thd', thd, epoch)

    # utils.save(model, os.path.join(args.save, 'weights.pt'))
  writer.close()
Exemplo n.º 17
0
def main():
    args = parser.parse_args()
    args.gpu_id = 0

    model = model_helper.ModelHelper(name="validation_net", init_params=False)

    # Bring in the init net from init_net.pb
    init_net_proto = caffe2_pb2.NetDef()
    with open(args.c2_init, "rb") as f:
        init_net_proto.ParseFromString(f.read())
    model.param_init_net = core.Net(
        init_net_proto
    )  # model.param_init_net.AppendNet(core.Net(init_net_proto)) #

    # bring in the predict net from predict_net.pb
    predict_net_proto = caffe2_pb2.NetDef()
    with open(args.c2_predict, "rb") as f:
        predict_net_proto.ParseFromString(f.read())
    model.net = core.Net(
        predict_net_proto)  # model.net.AppendNet(core.Net(predict_net_proto))

    data_config = resolve_data_config(args.model, args)
    loader = create_loader(Dataset(args.data,
                                   load_bytes=args.tf_preprocessing),
                           input_size=data_config['input_size'],
                           batch_size=args.batch_size,
                           use_prefetcher=False,
                           interpolation=data_config['interpolation'],
                           mean=data_config['mean'],
                           std=data_config['std'],
                           num_workers=args.workers,
                           crop_pct=data_config['crop_pct'],
                           tensorflow_preprocessing=args.tf_preprocessing)

    # this is so obvious, wonderful interface </sarcasm>
    input_blob = model.net.external_inputs[0]
    output_blob = model.net.external_outputs[0]

    if True:
        device_opts = None
    else:
        # CUDA is crashing, no idea why, awesome error message, give it a try for kicks
        device_opts = core.DeviceOption(caffe2_pb2.PROTO_CUDA, args.gpu_id)
        model.net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True)
        model.param_init_net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True)

    model.param_init_net.GaussianFill([],
                                      input_blob.GetUnscopedName(),
                                      shape=(1, ) + data_config['input_size'],
                                      mean=0.0,
                                      std=1.0)
    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net, overwrite=True)

    batch_time = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()
    for i, (input, target) in enumerate(loader):
        # run the net and return prediction
        caffe2_in = input.data.numpy()
        workspace.FeedBlob(input_blob, caffe2_in, device_opts)
        workspace.RunNet(model.net, num_iter=1)
        output = workspace.FetchBlob(output_blob)

        # measure accuracy and record loss
        prec1, prec5 = accuracy_np(output.data, target.numpy())
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print(
                'Test: [{0}/{1}]\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s, {ms_avg:.3f} ms/sample) \t'
                'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                    i,
                    len(loader),
                    batch_time=batch_time,
                    rate_avg=input.size(0) / batch_time.avg,
                    ms_avg=100 * batch_time.avg / input.size(0),
                    top1=top1,
                    top5=top5))

    print(
        ' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})'
        .format(top1=top1,
                top1a=100 - top1.avg,
                top5=top5,
                top5a=100. - top5.avg))
Exemplo n.º 18
0
import time
from train_arguments import Arguments
from data import create_loader
from model import create_model
from utils.general import Display

args = Arguments().parse()
data_loader = create_loader(args)
dataset = data_loader.load_data()
dataset_size = len(data_loader)

nl = '\n'
print(
    f'There are a total number of {dataset_size} sequences of {args.num_frames} frames in the training set.{nl}'
)

model = create_model(args)
model.set_up(args)
display = Display(args)
global_step = 0
total_steps = 0

print(f'Training has begun!{nl}')

for epoch in range(0, args.num_epochs):
    data_time_start = time.time()

    for j, data in enumerate(data_loader):
        processing_time_start = time.time()

        if global_step % args.print_freq == 0:
Exemplo n.º 19
0
def main():
    args = parser.parse_args()

    # create model
    model = create_model(args.model,
                         num_classes=args.num_classes,
                         in_chans=3,
                         pretrained=args.pretrained,
                         checkpoint_path=args.checkpoint)

    print('Model %s created, param count: %d' %
          (args.model, sum([m.numel() for m in model.parameters()])))

    data_config = resolve_data_config(model, args)
    model, test_time_pool = apply_test_time_pool(model, data_config, args)

    if args.num_gpu > 1:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(range(
                                          args.num_gpu))).cuda()
    else:
        model = model.cuda()

    criterion = nn.CrossEntropyLoss().cuda()

    loader = create_loader(
        Dataset(args.data),
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        use_prefetcher=True,
        interpolation=data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        crop_pct=1.0 if test_time_pool else data_config['crop_pct'])

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()
    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            target = target.cuda()
            input = input.cuda()

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print(
                    'Test: [{0}/{1}]\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s) \t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                    'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                        i,
                        len(loader),
                        batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                        loss=losses,
                        top1=top1,
                        top5=top5))

    print(
        ' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})'
        .format(top1=top1,
                top1a=100 - top1.avg,
                top5=top5,
                top5a=100. - top5.avg))
Exemplo n.º 20
0
                                            pred.detach().cpu().numpy())

    print("log: eval()")
    print("loss: ", loss.item(), "ap: ", ap, " map: ", map, " f1: ", f1)


if __name__ == '__main__':
    args = parser.parse_args()

    # dataset
    print("log: evaluation.py __main__ enter")
    print("Loading data...")

    if args.stgcn:
        print("Evaluating stgcn")
        val_loader = create_loader(args)
        stgcn_model = load_stgcn(STGCN_WEIGHTS_PATH)
        eval(stgcn_model, val_loader)

    elif args.dgnn:
        print("Evaluating dgnn")
        val_loader = create_loader(args)
        dgnn_model = load_dgnn(DGNN_WEIGHTS_PATH)
        eval(dgnn_model, val_loader, use_bones=True)

    elif args.lstm:
        print("Evaluating lstm")
        subprocess.call(["python3", "lstm/main.py"])

    elif args.step:
        print("Evaluating step")
Exemplo n.º 21
0
def main():
    
    # file and stream logger
    log_path = 'log/logger_info.log'
    lg = logger('Base', log_path)
    pn = 40
    print('\n','-'*pn, 'General INFO', '-'*pn)

    # setting arguments
    parser = argparse.ArgumentParser(description='Test arguments')
    parser.add_argument('--opt', type=str, required=True, help='path to test yaml file')
    parser.add_argument('--dataset_name', type=str, default=None)
    parser.add_argument('--scale', type=int, required=True)
    parser.add_argument('--which_model', type=str, required=True, help='which pretrained model')
    parser.add_argument('--pretrained', type=str, required=True, help='pretrain path')

    args = parser.parse_args()
    args = test_parse(args, lg)
    
    # create test dataloader
    test_dataset = create_dataset(args['datasets']['test'])
    test_loader = create_loader(test_dataset, args['datasets']['test'])
    lg.info('\nHR root: [{}]\nLR root: [{}]'.format(args['datasets']['test']['dataroot_HR'], args['datasets']['test']['dataroot_LR']))
    lg.info('Number of test images: [{}]'.format(len(test_dataset)))

    # create model
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = create_model(args['networks']).to(device)
    lg.info('Create model: [{}]'.format(args['networks']['which_model']))
    scale = args['scale']
    state_dict = torch.load(args['networks']['pretrained'])
    lg.info('Load pretrained from: [{}]'.format(args['networks']['pretrained']))
    model.load_state_dict(state_dict)
    
    # calculate cuda time
    if args['calc_cuda_time']:
        lg.info('Start calculating cuda time...')
        avg_test_time = calc_cuda_time(test_loader, model)
        lg.info('Average cuda time: [{:.5f}]'.format(avg_test_time))
    
    # Test
    print('\n', '-'*pn, 'Testing {}'.format(args['dataset_name']), '-'*pn)
    #pbar = ProgressBar(len(test_loader))
    psnr_list = []
    ssim_list = []
    time_list = []

    for iter, data in enumerate(test_loader):
        lr = data['LR'].to(device)
        hr = data['HR']
        
        # calculate evaluation metrics
        sr = model(lr)
        psnr, ssim = calc_metrics(tensor2np(sr), tensor2np(hr), crop_border=scale, test_Y=True)
        psnr_list.append(psnr)
        ssim_list.append(ssim)

        #pbar.update('')
        print('[{:03d}/{:03d}] || PSNR/SSIM: {:.2f}/{:.4f} || {}'.format(iter+1, len(test_loader), psnr, ssim, data['filename']))
    
    avg_psnr = sum(psnr_list) / len(psnr_list)
    avg_ssim = sum(ssim_list) / len(ssim_list)

    print('\n','-'*pn, 'Summary', '-'*pn)
    print('Average PSNR: {:.2f}  Average SSIM: {:.4f}'.format(avg_psnr, avg_ssim))


    print('\n','-'*pn, 'Finish', '-'*pn)
def validate(args):
    # might as well try to validate something
    args.pretrained = args.pretrained or not args.checkpoint
    args.prefetcher = not args.no_prefetcher

    # create model
    config = get_efficientdet_config(args.model)
    model = EfficientDet(config)
    if args.checkpoint:
        load_checkpoint(model, args.checkpoint)

    param_count = sum([m.numel() for m in model.parameters()])
    logging.info('Model %s created, param count: %d' %
                 (args.model, param_count))

    bench = DetBenchEval(model, config)

    bench.model = bench.model.cuda()
    if has_amp:
        bench.model = amp.initialize(bench.model, opt_level='O1')

    if args.num_gpu > 1:
        bench.model = torch.nn.DataParallel(bench.model,
                                            device_ids=list(range(
                                                args.num_gpu)))

    if 'test' in args.anno:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'image_info_{args.anno}.json')
        image_dir = 'test2017'
    else:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'instances_{args.anno}.json')
        image_dir = args.anno
    dataset = CocoDetection(os.path.join(args.data, image_dir),
                            annotation_path)

    loader = create_loader(dataset,
                           input_size=config.image_size,
                           batch_size=args.batch_size,
                           use_prefetcher=args.prefetcher,
                           interpolation=args.interpolation,
                           num_workers=args.workers)

    img_ids = []
    results = []
    model.eval()
    batch_time = AverageMeter()
    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            output = bench(input, target['img_id'], target['scale'])
            for batch_out in output:
                for det in batch_out:
                    image_id = int(det[0])
                    score = float(det[5])
                    coco_det = {
                        'image_id': image_id,
                        'bbox': det[1:5].tolist(),
                        'score': score,
                        'category_id': int(det[6]),
                    }
                    img_ids.append(image_id)
                    results.append(coco_det)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.log_freq == 0:
                print(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    .format(
                        i,
                        len(loader),
                        batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                    ))

    json.dump(results, open(args.results, 'w'), indent=4)
    if 'test' not in args.anno:
        coco_results = dataset.coco.loadRes(args.results)
        coco_eval = COCOeval(dataset.coco, coco_results, 'bbox')
        coco_eval.params.imgIds = img_ids  # score only ids we've used
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

    return results
Exemplo n.º 23
0
def validate(args):
    setup_dllogger(0, filename=args.dllogger_file)

    if args.checkpoint != '':
        args.pretrained = True
    args.prefetcher = not args.no_prefetcher
    if args.waymo:
        assert args.waymo_val is not None

    memory_format = (torch.channels_last if args.memory_format == "nhwc" else
                     torch.contiguous_format)
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    if args.distributed:
        torch.cuda.manual_seed_all(args.seed)
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()

        # Set device limit on the current device
        # cudaLimitMaxL2FetchGranularity = 0x05
        pValue = ctypes.cast((ctypes.c_int * 1)(),
                             ctypes.POINTER(ctypes.c_int))
        _libcudart.cudaDeviceSetLimit(ctypes.c_int(0x05), ctypes.c_int(128))
        _libcudart.cudaDeviceGetLimit(pValue, ctypes.c_int(0x05))
        assert pValue.contents.value == 128
    assert args.rank >= 0

    # create model
    bench = create_model(args.model,
                         input_size=args.input_size,
                         num_classes=args.num_classes,
                         bench_task='predict',
                         pretrained=args.pretrained,
                         redundant_bias=args.redundant_bias,
                         checkpoint_path=args.checkpoint,
                         checkpoint_ema=args.use_ema,
                         soft_nms=args.use_soft_nms,
                         strict_load=False)
    input_size = bench.config.image_size
    data_config = bench.config

    param_count = sum([m.numel() for m in bench.parameters()])
    print('Model %s created, param count: %d' % (args.model, param_count))

    bench = bench.cuda().to(memory_format=memory_format)

    if args.distributed > 1:
        raise ValueError(
            "Evaluation is supported only on single GPU. args.num_gpu must be 1"
        )
        bench = DDP(
            bench, device_ids=[args.device]
        )  # torch.nn.DataParallel(bench, device_ids=list(range(args.num_gpu)))

    if args.waymo:
        annotation_path = args.waymo_val_annotation
        image_dir = args.waymo_val
    else:
        if 'test' in args.anno:
            annotation_path = os.path.join(args.data, 'annotations',
                                           f'image_info_{args.anno}.json')
            image_dir = 'test2017'
        else:
            annotation_path = os.path.join(args.data, 'annotations',
                                           f'instances_{args.anno}.json')
            image_dir = args.anno
    dataset = CocoDetection(os.path.join(args.data, image_dir),
                            annotation_path, data_config)

    evaluator = COCOEvaluator(dataset.coco,
                              distributed=args.distributed,
                              waymo=args.waymo)

    loader = create_loader(dataset,
                           input_size=input_size,
                           batch_size=args.batch_size,
                           use_prefetcher=args.prefetcher,
                           interpolation=args.interpolation,
                           fill_color=args.fill_color,
                           num_workers=args.workers,
                           distributed=args.distributed,
                           pin_mem=args.pin_mem,
                           memory_format=memory_format)

    img_ids = []
    results = []
    dllogger_metric = {}
    bench.eval()
    batch_time = AverageMeter()
    throughput = AverageMeter()
    end = time.time()
    total_time_start = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            with torch.cuda.amp.autocast(enabled=args.amp):
                output = bench(input, target['img_scale'], target['img_size'])
            batch_time.update(time.time() - end)
            throughput.update(input.size(0) / batch_time.val)
            evaluator.add_predictions(output, target)
            torch.cuda.synchronize()

            # measure elapsed time
            if i == 9:
                batch_time.reset()
                throughput.reset()

            if args.rank == 0 and i % args.log_freq == 0:
                print(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    .format(
                        i,
                        len(loader),
                        batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                    ))
            end = time.time()

    dllogger_metric['total_inference_time'] = time.time() - total_time_start
    dllogger_metric['inference_throughput'] = throughput.avg
    dllogger_metric['inference_time'] = 1000 / throughput.avg
    total_time_start = time.time()
    mean_ap = 0.
    if not args.inference:
        if 'test' not in args.anno:
            mean_ap = evaluator.evaluate()
        else:
            evaluator.save_predictions(args.results)
        dllogger_metric['map'] = mean_ap
        dllogger_metric['total_eval_time'] = time.time() - total_time_start
    else:
        evaluator.save_predictions(args.results)

    if not args.distributed or args.rank == 0:
        dllogger.log(step=(), data=dllogger_metric, verbosity=0)

    return results
Exemplo n.º 24
0
def validate(args):
    setup_default_logging()

    def setthresh():
        if args.checkpoint.split("/")[-1].split(
                "_")[0] in getthresholds.keys():
            return getthresholds[args.checkpoint.split("/")[-1].split("_")[0]]
        else:
            a = []
            [a.append(args.threshold) for x in range(4)]
            return a

    # might as well try to validate something
    args.pretrained = args.pretrained or not args.checkpoint
    args.prefetcher = not args.no_prefetcher

    # create model
    bench = create_model(
        args.model,
        bench_task='predict',
        pretrained=args.pretrained,
        redundant_bias=args.redundant_bias,
        checkpoint_path=args.checkpoint,
        checkpoint_ema=args.use_ema,
    )
    input_size = bench.config.image_size

    param_count = sum([m.numel() for m in bench.parameters()])
    print('Model %s created, param count: %d' % (args.model, param_count))

    bench = bench.cuda()
    if has_amp:
        print('Using AMP mixed precision.')
        bench = amp.initialize(bench, opt_level='O1')
    else:
        print('AMP not installed, running network in FP32.')

    if args.num_gpu > 1:
        bench = torch.nn.DataParallel(bench,
                                      device_ids=list(range(args.num_gpu)))

    if 'test' in args.anno:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'image_info_{args.anno}.json')
        image_dir = args.anno
    elif 'val' in args.anno:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'instances_{args.anno}.json')
        image_dir = args.anno
    # else:
    #     annotation_path = os.path.join(args.data, f'{args.anno}.json')
    #     image_dir = args.anno
    print(os.path.join(args.data, image_dir), annotation_path)
    dataset = CocoDetection(os.path.join(args.data, image_dir),
                            annotation_path)

    loader = create_loader(dataset,
                           input_size=input_size,
                           batch_size=args.batch_size,
                           use_prefetcher=args.prefetcher,
                           interpolation=args.interpolation,
                           fill_color=args.fill_color,
                           num_workers=args.workers,
                           pin_mem=args.pin_mem,
                           mean=args.mean,
                           std=args.std)

    if 'test' in args.anno:
        threshold = float(args.threshold)
    # elif 'detector' in args.anno:

    #     threshold = min(getthresholds['d0'])
    else:
        threshold = .001
    img_ids = []
    results = []
    writetofilearrtay = []
    bench.eval()
    batch_time = AverageMeter()
    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            output = bench(input, target['img_scale'], target['img_size'])
            output = output.cpu()
            # print(target['img_id'])
            sample_ids = target['img_id'].cpu()

            for index, sample in enumerate(output):
                image_id = int(sample_ids[index])
                # if 'test' in args.anno :
                #     tempWritetoFile = []
                #     tempWritetoFile.append(getimageNamefromid(image_id))

                for det in sample:
                    score = float(det[4])
                    if score < threshold:  # stop when below this threshold, scores in descending order
                        coco_det = dict(image_id=image_id, category_id=-1)
                        img_ids.append(image_id)
                        results.append(coco_det)
                        break
                    coco_det = dict(image_id=image_id,
                                    bbox=det[0:4].tolist(),
                                    score=score,
                                    category_id=int(det[5]),
                                    sizes=target['img_size'].tolist()[0])
                    img_ids.append(image_id)
                    results.append(coco_det)

            # exit()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.log_freq == 0:
                print(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    .format(
                        i,
                        len(loader),
                        batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                    ))

    # if 'test' in args.anno :
    if not os.path.exists(args.tosave):
        os.makedirs(args.tosave)
    from itertools import groupby
    results.sort(key=lambda x: x['image_id'])

    count = 0
    for k, v in tqdm(groupby(results, key=lambda x: x['image_id'])):
        # print(args.data +"/" + str(getimageNamefromid(k)))
        img = drawonimage(
            os.path.join(args.data, image_dir, str(getimageNamefromid(k))), v,
            setthresh())
        cv2.imwrite(args.tosave + "/" + str(getimageNamefromid(k)), img)
        count += 1
        # print(i['category_id']," ",i['bbox'][0]," ",i['bbox'][1]," ",i['bbox'][2]," ",i['bbox'][3]," ")
    print("generated predictions for ", count, " images.")

    return results
def main():
    setup_default_logging()
    args, args_text = _parse_args()

    args.prefetcher = not args.no_prefetcher
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    if args.distributed:
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()
    assert args.rank >= 0

    if args.distributed:
        logging.info(
            'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
            % (args.rank, args.world_size))
    else:
        logging.info('Training with a single process on 1 GPU.')

    #torch.manual_seed(args.seed + args.rank)

    # create model
    config = get_efficientdet_config(args.model)
    config.redundant_bias = args.redundant_bias  # redundant conv + BN bias layers (True to match official models)
    model = EfficientDet(config)
    if args.initial_checkpoint:
        load_checkpoint(model, args.initial_checkpoint)
    config.num_classes = 5
    model.class_net.predict.conv_pw = create_conv2d(config.fpn_channels,
                                                    9 * 5,
                                                    1,
                                                    padding=config.pad_type,
                                                    bias=True)
    variance_scaling(model.class_net.predict.conv_pw.weight)
    model.class_net.predict.conv_pw.bias.data.fill_(-math.log((1 - 0.01) /
                                                              0.01))
    model = DetBenchTrain(model, config)

    model.cuda()
    print(model.model.class_net.predict.conv_pw)
    # FIXME create model factory, pretrained zoo
    # model = create_model(
    #     args.model,
    #     pretrained=args.pretrained,
    #     num_classes=args.num_classes,
    #     drop_rate=args.drop,
    #     drop_connect_rate=args.drop_connect,  # DEPRECATED, use drop_path
    #     drop_path_rate=args.drop_path,
    #     drop_block_rate=args.drop_block,
    #     global_pool=args.gp,
    #     bn_tf=args.bn_tf,
    #     bn_momentum=args.bn_momentum,
    #     bn_eps=args.bn_eps,
    #     checkpoint_path=args.initial_checkpoint)

    if args.local_rank == 0:
        logging.info('Model %s created, param count: %d' %
                     (args.model, sum([m.numel()
                                       for m in model.parameters()])))

    optimizer = create_optimizer(args, model)
    use_amp = False
    if has_apex and args.amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
        use_amp = True
    if args.local_rank == 0:
        logging.info('NVIDIA APEX {}. AMP {}.'.format(
            'installed' if has_apex else 'not installed',
            'on' if use_amp else 'off'))

    # optionally resume from a checkpoint
    resume_state = {}
    resume_epoch = None
    if args.resume:
        resume_state, resume_epoch = resume_checkpoint(_unwrap_bench(model),
                                                       args.resume)
    if resume_state and not args.no_resume_opt:
        if 'optimizer' in resume_state:
            if args.local_rank == 0:
                logging.info('Restoring Optimizer state from checkpoint')
            optimizer.load_state_dict(resume_state['optimizer'])
        if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__:
            if args.local_rank == 0:
                logging.info('Restoring NVIDIA AMP state from checkpoint')
            amp.load_state_dict(resume_state['amp'])
    del resume_state

    model_ema = None
    if args.model_ema:
        # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
        model_ema = ModelEma(model,
                             decay=args.model_ema_decay,
                             device='cpu' if args.model_ema_force_cpu else '')
        #resume=args.resume)  # FIXME bit of a mess with bench
        if args.resume:
            load_checkpoint(_unwrap_bench(model_ema),
                            args.resume,
                            use_ema=True)

    if args.distributed:
        if args.sync_bn:
            try:
                if has_apex:
                    model = convert_syncbn_model(model)
                else:
                    model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
                        model)
                if args.local_rank == 0:
                    logging.info(
                        'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using '
                        'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.'
                    )
            except Exception as e:
                logging.error(
                    'Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1'
                )
        if has_apex:
            model = DDP(model, delay_allreduce=True)
        else:
            if args.local_rank == 0:
                logging.info(
                    "Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP."
                )
            model = DDP(model,
                        device_ids=[args.local_rank
                                    ])  # can use device str in Torch >= 1.1
        # NOTE: EMA model does not need to be wrapped by DDP
    lr_scheduler, num_epochs = create_scheduler(args, optimizer)
    start_epoch = 0
    if args.start_epoch is not None:
        # a specified start_epoch will always override the resume epoch
        start_epoch = args.start_epoch
    elif resume_epoch is not None:
        start_epoch = resume_epoch
    if lr_scheduler is not None and start_epoch > 0:
        lr_scheduler.step(start_epoch)

    if args.local_rank == 0:
        logging.info('Scheduled epochs: {}'.format(num_epochs))

    train_anno_set = 'train_small'
    train_annotation_path = os.path.join(args.data, 'annotations_small',
                                         f'train_annotations.json')
    train_image_dir = train_anno_set
    dataset_train = CocoDetection(os.path.join(args.data, train_image_dir),
                                  train_annotation_path)

    # FIXME cutmix/mixup worth investigating?
    # collate_fn = None
    # if args.prefetcher and args.mixup > 0:
    #     collate_fn = FastCollateMixup(args.mixup, args.smoothing, args.num_classes)

    loader_train = create_loader(
        dataset_train,
        input_size=config.image_size,
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        #re_prob=args.reprob,  # FIXME add back various augmentations
        #re_mode=args.remode,
        #re_count=args.recount,
        #re_split=args.resplit,
        #color_jitter=args.color_jitter,
        #auto_augment=args.aa,
        interpolation=args.train_interpolation,
        #mean=data_config['mean'],
        #std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        #collate_fn=collate_fn,
        pin_mem=args.pin_mem,
    )

    #train_anno_set = 'valid_small'
    #train_annotation_path = os.path.join(args.data, 'annotations_small', f'valid_annotations.json')
    #train_image_dir = train_anno_set
    dataset_eval = CocoDetection(os.path.join(args.data, train_image_dir),
                                 train_annotation_path)

    loader_eval = create_loader(
        dataset_eval,
        input_size=config.image_size,
        batch_size=args.validation_batch_size_multiplier * args.batch_size,
        is_training=False,
        use_prefetcher=args.prefetcher,
        interpolation=args.interpolation,
        #mean=data_config['mean'],
        #std=data_config['std'],
        num_workers=args.workers,
        #distributed=args.distributed,
        pin_mem=args.pin_mem,
    )

    evaluator = COCOEvaluator(dataset_eval.coco, distributed=args.distributed)

    eval_metric = args.eval_metric
    best_metric = None
    best_epoch = None
    saver = None
    output_dir = ''
    if args.local_rank == 0:
        output_base = args.output if args.output else './output'
        exp_name = '-'.join(
            [datetime.now().strftime("%Y%m%d-%H%M%S"), args.model])
        output_dir = get_outdir(output_base, 'train', exp_name)
        decreasing = True if eval_metric == 'loss' else False
        saver = CheckpointSaver(checkpoint_dir=output_dir,
                                decreasing=decreasing)
        with open(os.path.join(output_dir, 'args.yaml'), 'w') as f:
            f.write(args_text)

    try:
        for epoch in range(start_epoch, num_epochs):
            if args.distributed:
                loader_train.sampler.set_epoch(epoch)

            train_metrics = train_epoch(epoch,
                                        model,
                                        loader_train,
                                        optimizer,
                                        args,
                                        lr_scheduler=lr_scheduler,
                                        saver=saver,
                                        output_dir=output_dir,
                                        use_amp=use_amp,
                                        model_ema=model_ema)

            if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
                if args.local_rank == 0:
                    logging.info(
                        "Distributing BatchNorm running means and vars")
                distribute_bn(model, args.world_size, args.dist_bn == 'reduce')

            eval_metrics = validate(model, loader_eval, args, evaluator)

            if model_ema is not None and not args.model_ema_force_cpu:
                if args.distributed and args.dist_bn in ('broadcast',
                                                         'reduce'):
                    distribute_bn(model_ema, args.world_size,
                                  args.dist_bn == 'reduce')

                ema_eval_metrics = validate(model_ema.ema,
                                            loader_eval,
                                            args,
                                            log_suffix=' (EMA)')
                eval_metrics = ema_eval_metrics

            if lr_scheduler is not None:
                # step LR for next epoch
                lr_scheduler.step(epoch + 1, eval_metrics[eval_metric])

            if saver is not None:
                update_summary(epoch,
                               train_metrics,
                               eval_metrics,
                               os.path.join(output_dir, 'summary.csv'),
                               write_header=best_metric is None)

                # save proper checkpoint with eval metric
                save_metric = eval_metrics[eval_metric]
                best_metric, best_epoch = saver.save_checkpoint(
                    _unwrap_bench(model),
                    optimizer,
                    args,
                    epoch=epoch,
                    model_ema=_unwrap_bench(model_ema),
                    metric=save_metric,
                    use_amp=use_amp)

    except KeyboardInterrupt:
        pass
    if best_metric is not None:
        logging.info('*** Best metric: {0} (epoch {1})'.format(
            best_metric, best_epoch))
Exemplo n.º 26
0
def main():
    args = parser.parse_args()
    print(args)

    if args.img_size is None:
        args.img_size, args.crop_pct = get_image_size_crop_pct(args.model)

    if not args.checkpoint and not args.pretrained:
        args.pretrained = True

    if args.torchscript:
        geffnet.config.set_scriptable(True)

    # create model
    model = geffnet.create_model(args.model,
                                 num_classes=args.num_classes,
                                 in_chans=3,
                                 pretrained=args.pretrained,
                                 checkpoint_path=args.checkpoint)

    if args.torchscript:
        torch.jit.optimized_execution(True)
        model = torch.jit.script(model)

    print('Model %s created, param count: %d' %
          (args.model, sum([m.numel() for m in model.parameters()])))

    data_config = resolve_data_config(model, args)

    criterion = nn.CrossEntropyLoss()

    if not args.no_cuda:
        if args.num_gpu > 1:
            model = torch.nn.DataParallel(model,
                                          device_ids=list(range(
                                              args.num_gpu))).cuda()
        else:
            model = model.cuda()
        criterion = criterion.cuda()

    if args.tune:
        model.eval()
        model.fuse_model()
        conf_yaml = "conf_" + args.model + ".yaml"
        from lpot import Quantization
        quantizer = Quantization(conf_yaml)
        q_model = quantizer(model)
        exit(0)

    valdir = os.path.join(args.data, 'val')
    loader = create_loader(Dataset(valdir, load_bytes=args.tf_preprocessing),
                           input_size=data_config['input_size'],
                           batch_size=args.batch_size,
                           use_prefetcher=not args.no_cuda,
                           interpolation=data_config['interpolation'],
                           mean=data_config['mean'],
                           std=data_config['std'],
                           num_workers=args.workers,
                           crop_pct=data_config['crop_pct'],
                           tensorflow_preprocessing=args.tf_preprocessing)

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()
    model.fuse_model()
    if args.int8:
        from lpot.utils.pytorch import load
        new_model = load(
            os.path.abspath(os.path.expanduser(args.tuned_checkpoint)), model)
    else:
        new_model = model

    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            if i >= args.warmup_iterations:
                start = time.time()
            if not args.no_cuda:
                target = target.cuda()
                input = input.cuda()

            # compute output
            output = new_model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            if i >= args.warmup_iterations:
                # measure elapsed time
                batch_time.update(time.time() - start)

            if i % args.print_freq == 0:
                print(
                    'Test: [{0}/{1}]\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s) \t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                    'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                        i,
                        len(loader),
                        batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                        loss=losses,
                        top1=top1,
                        top5=top5))
            if args.iterations > 0 and i >= args.iterations + args.warmup_iterations - 1:
                break

        print('Batch size = %d' % args.batch_size)
        if args.batch_size == 1:
            print('Latency: %.3f ms' % (batch_time.avg * 1000))
        print('Throughput: %.3f images/sec' %
              (args.batch_size / batch_time.avg))
        print('Accuracy: {top1:.5f} Accuracy@5 {top5:.5f}'.format(
            top1=(top1.avg / 100), top5=(top5.avg / 100)))
Exemplo n.º 27
0
def validate(args):
    # might as well try to validate something
    args.pretrained = args.pretrained or not args.checkpoint
    args.prefetcher = not args.no_prefetcher
    args.redundant_bias = not args.no_redundant_bias

    # create model
    config = get_efficientdet_config(args.model)
    config.redundant_bias = args.redundant_bias
    model = EfficientDet(config)
    if args.checkpoint:
        load_checkpoint(model, args.checkpoint)

    param_count = sum([m.numel() for m in model.parameters()])
    print('Model %s created, param count: %d' % (args.model, param_count))

    bench = DetBenchEval(model, config)
    bench = bench.cuda()
    if has_amp:
        print('Using AMP mixed precision.')
        bench = amp.initialize(bench, opt_level='O1')
    else:
        print('AMP not installed, running network in FP32.')

    if args.num_gpu > 1:
        bench = torch.nn.DataParallel(bench,
                                      device_ids=list(range(args.num_gpu)))

    if 'test' in args.anno:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'image_info_{args.anno}.json')
        image_dir = 'test2017'
    else:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'instances_{args.anno}.json')
        image_dir = args.anno
    dataset = CocoDetection(os.path.join(args.data, image_dir),
                            annotation_path)

    loader = create_loader(dataset,
                           input_size=config.image_size,
                           batch_size=args.batch_size,
                           use_prefetcher=args.prefetcher,
                           interpolation=args.interpolation,
                           fill_color=args.fill_color,
                           num_workers=args.workers,
                           pin_mem=args.pin_mem)

    img_ids = []
    results = []
    model.eval()
    batch_time = AverageMeter()
    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(loader):
            output = bench(input, target['scale'])
            output = output.cpu()
            sample_ids = target['img_id'].cpu()
            for index, sample in enumerate(output):
                image_id = int(sample_ids[index])
                for det in sample:
                    score = float(det[4])
                    if score < .001:  # stop when below this threshold, scores in descending order
                        break
                    coco_det = dict(image_id=image_id,
                                    bbox=det[0:4].tolist(),
                                    score=score,
                                    category_id=int(det[5]))
                    img_ids.append(image_id)
                    results.append(coco_det)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.log_freq == 0:
                print(
                    'Test: [{0:>4d}/{1}]  '
                    'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
                    .format(
                        i,
                        len(loader),
                        batch_time=batch_time,
                        rate_avg=input.size(0) / batch_time.avg,
                    ))

    json.dump(results, open(args.results, 'w'), indent=4)
    if 'test' not in args.anno:
        coco_results = dataset.coco.loadRes(args.results)
        coco_eval = COCOeval(dataset.coco, coco_results, 'bbox')
        coco_eval.params.imgIds = img_ids  # score only ids we've used
        coco_eval.evaluate()
        coco_eval.accumulate()
        coco_eval.summarize()

    return results
Exemplo n.º 28
0
def eval_model(model_name, paper_model_name, paper_arxiv_id, batch_size=64, model_description=''):

    # create model
    bench = create_model(
        model_name,
        bench_task='predict',
        pretrained=True,
    )
    bench.eval()
    input_size = bench.config.image_size

    param_count = sum([m.numel() for m in bench.parameters()])
    print('Model %s created, param count: %d' % (model_name, param_count))

    bench = bench.cuda()
    if has_amp:
        print('Using AMP mixed precision.')
        bench = amp.initialize(bench, opt_level='O1')
    else:
        print('AMP not installed, running network in FP32.')

    annotation_path = os.path.join(DATA_ROOT, 'annotations', f'instances_{ANNO_SET}.json')
    evaluator = COCOEvaluator(
        root=DATA_ROOT,
        model_name=paper_model_name,
        model_description=model_description,
        paper_arxiv_id=paper_arxiv_id)

    dataset = CocoDetection(os.path.join(DATA_ROOT, ANNO_SET), annotation_path)

    loader = create_loader(
        dataset,
        input_size=input_size,
        batch_size=batch_size,
        use_prefetcher=True,
        fill_color='mean',
        num_workers=4,
        pin_mem=True)

    iterator = tqdm.tqdm(loader, desc="Evaluation", mininterval=5)
    evaluator.reset_time()

    with torch.no_grad():
        for i, (input, target) in enumerate(iterator):
            output = bench(input, target['img_scale'], target['img_size'])
            output = output.cpu()
            sample_ids = target['img_id'].cpu()
            results = []
            for index, sample in enumerate(output):
                image_id = int(sample_ids[index])
                for det in sample:
                    score = float(det[4])
                    if score < .001:  # stop when below this threshold, scores in descending order
                        break
                    coco_det = dict(
                        image_id=image_id,
                        bbox=det[0:4].tolist(),
                        score=score,
                        category_id=int(det[5]))
                    results.append(coco_det)
            evaluator.add(results)

            if evaluator.cache_exists:
                break

    evaluator.save()
Exemplo n.º 29
0
def main():
    args = parser.parse_args()

    # create model
    model = create_model(
        args.model,
        num_classes=args.num_classes,
        in_chans=3,
        pretrained=args.pretrained,
        checkpoint_path=args.checkpoint)

    print('Model %s created, param count: %d' %
          (args.model, sum([m.numel() for m in model.parameters()])))

    config = resolve_data_config(model, args)
    model, test_time_pool = apply_test_time_pool(model, config, args)

    if args.num_gpu > 1:
        model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda()
    else:
        model = model.cuda()

    loader = create_loader(
        Dataset(args.data),
        input_size=config['input_size'],
        batch_size=args.batch_size,
        use_prefetcher=True,
        interpolation=config['interpolation'],
        mean=config['mean'],
        std=config['std'],
        num_workers=args.workers,
        crop_pct=1.0 if test_time_pool else config['crop_pct'])

    model.eval()

    k = min(args.topk, args.num_classes)
    batch_time = AverageMeter()
    end = time.time()
    topk_ids = []
    with torch.no_grad():
        for batch_idx, (input, _) in enumerate(loader):
            input = input.cuda()
            labels = model(input)
            topk = labels.topk(k)[1]
            topk_ids.append(topk.cpu().numpy())

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % args.print_freq == 0:
                print('Predict: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format(
                    batch_idx, len(loader), batch_time=batch_time))

    topk_ids = np.concatenate(topk_ids, axis=0).squeeze()

    with open(os.path.join(args.output_dir, './topk_ids.csv'), 'w') as out_file:
        filenames = loader.dataset.filenames()
        for filename, label in zip(filenames, topk_ids):
            filename = os.path.basename(filename)
            out_file.write('{0},{1},{2},{3},{4},{5}\n'.format(
                filename, label[0], label[1], label[2], label[3], label[4]))
Exemplo n.º 30
0
def validate(args):
    setup_default_logging()

    # might as well try to validate something
    args.pretrained = args.pretrained or not args.checkpoint
    args.prefetcher = not args.no_prefetcher

    # create model
    bench = create_model(args.model,
                         bench_task='predict',
                         pretrained=args.pretrained,
                         redundant_bias=args.redundant_bias,
                         checkpoint_path=args.checkpoint,
                         checkpoint_ema=args.use_ema)
    input_size = bench.config.image_size

    param_count = sum([m.numel() for m in bench.parameters()])
    print('Model %s created, param count: %d' % (args.model, param_count))

    bench = bench.cuda()
    if has_amp:
        print('Using AMP mixed precision.')
        bench = amp.initialize(bench, opt_level='O1')
    else:
        print('AMP not installed, running network in FP32.')

    if args.num_gpu > 1:
        bench = torch.nn.DataParallel(bench,
                                      device_ids=list(range(args.num_gpu)))

    if 'test' in args.anno:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'image_info_{args.anno}.json')
        image_dir = args.anno
    else:
        annotation_path = os.path.join(args.data, 'annotations',
                                       f'instances_{args.anno}.json')
        image_dir = args.anno
    dataset = CocoDetection(os.path.join(args.data, image_dir),
                            annotation_path)

    loader = create_loader(dataset,
                           input_size=input_size,
                           batch_size=args.batch_size,
                           use_prefetcher=args.prefetcher,
                           interpolation=args.interpolation,
                           fill_color=args.fill_color,
                           num_workers=args.workers,
                           mean=args.mean,
                           std=args.std,
                           pin_mem=args.pin_mem)

    img_ids = []
    results = []
    bench.eval()

    for i, (input, target) in enumerate(loader, 1):
        dumm_inp = input
        tisc = target['img_scale']
        tisz = target['img_size']
        break

    starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(
        enable_timing=True)
    # repetitions = 300
    # timings=np.zeros((repetitions,1))
    #GPU-WARM-UP
    # print(enumerate())
    for _ in range(10):
        _ = bench(dumm_inp, tisc, tisz)
    # MEASURE PERFORMANCE

    # dummy_input = torch.randn(1, 3,bench.config.image_size,bench.config.image_size,dtype=torch.float).to("cuda")
    print("starting")
    batch_time = AverageMeter()
    # end = time.time()
    with torch.no_grad():
        for _ in range(2000):
            starter.record()
            _ = bench(dumm_inp, tisc, tisz)
            ender.record()
            # measure elapsed time
            torch.cuda.synchronize()
            curr_time = starter.elapsed_time(ender)
            batch_time.update(curr_time)
            # print(curr_time)
            # end = time.time()

            # if i % args.log_freq == 0:
            print(
                'Test: [{0:>4d}/{1}]  '
                'Time: {batch_time.val:.3f}ms ({batch_time.avg:.3f}ms, {rate_avg:>7.2f}/s)  '
                .format(
                    i,
                    len(loader),
                    batch_time=batch_time,
                    rate_avg=dumm_inp.size(0) / batch_time.avg,
                ))

    # json.dump(results, open(args.results, 'w'), indent=4)
    # if 'test' not in args.anno:
    #     coco_results = dataset.coco.loadRes(args.results)
    #     coco_eval = COCOeval(dataset.coco, coco_results, 'bbox')
    #     coco_eval.params.imgIds = img_ids  # score only ids we've used
    #     coco_eval.evaluate()
    #     coco_eval.accumulate()
    #     coco_eval.summarize()

    return results