Exemplo n.º 1
0
def main(img_path, base_name, checkpoint_path):
    ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

    if use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2)

    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                ratios=anchor_ratios, scales=anchor_scales)
    # model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
    model.load_state_dict(torch.load(checkpoint_path))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()

        out = postprocess(x,
                        anchors, regression, classification,
                        regressBoxes, clipBoxes,
                        threshold, iou_threshold)

    out = invert_affine(framed_metas, out)
    display(out, ori_imgs, base_name,imshow=False, imwrite=True)
Exemplo n.º 2
0
def show(args):
    input_size = input_sizes[args.compound_coef]
    ori_imgs, framed_imgs, framed_metas = eval_preprocess(args.img_path, max_size=input_size)
    if args.use_cuda:
        x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
    else:
        x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

    x = x.to(torch.float32).permute(0, 3, 1, 2)

    model = EfficientDetBackbone(compound_coef=args.compound_coef, num_classes=len(obj_list),
                                 ratios=anchor_ratios, scales=anchor_scales)

    model.load_state_dict(torch.load(args.pth, map_location='cpu'))
    model.requires_grad_(False)
    model.eval()
    if args.use_cuda:
        model = model.cuda(device=args.device)

    with torch.no_grad():
        features, regression, classification, anchors = model(x)

        regressBoxes = Rotation_BBoxTransform()
        clipBoxes = ClipBoxes()
        addBoxes = BBoxAddScores()
        out = postprocess(x,
                          anchors, regression, classification,
                          regressBoxes, clipBoxes, addBoxes,
                          args.score_threshold, args.iou_threshold)

        out = invert_affine(framed_metas, out)
        display(out, ori_imgs, imshow=True, imwrite=False)
Exemplo n.º 3
0
def main():
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]

    print('Loading all models in memory ... ')

    models = []
    for cp_coef in range(8):
        model = EfficientDetBackbone(compound_coef=cp_coef,
                                     num_classes=90,
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)
        model.load_state_dict(
            torch.load(f'weights/efficientdet-d{cp_coef}.pth',
                       map_location='cpu'))
        model.requires_grad_(False)
        model.eval()
        model = model.cuda()
        models.append(model)

    param = []
    for i, m in enumerate(models):
        if i not in PICK_MODELS:
            continue
        print()
        print('Model: d' + str(i), '>>>')
        dim = input_sizes[i]
        size = (3, dim, dim)
        run_inf(m, size, i, start_bs=128)

    for i, m in enumerate(models):
        out = m(torch.randn(1, 3, input_sizes[0], input_sizes[0]).cuda())
Exemplo n.º 4
0
def main():
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]

    print('Single models in memory ... ')
    for cp_coef in PICK_MODELS:
        print()
        print('Model: d' + str(cp_coef), '>>>')
        start = time.perf_counter()
        model = EfficientDetBackbone(compound_coef=cp_coef,
                                     num_classes=90,
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)
        model.load_state_dict(
            torch.load(f'weights/efficientdet-d{cp_coef}.pth',
                       map_location='cpu'))
        model.requires_grad_(False)
        model.eval()
        model = model.cpu()
        print('Loading(s): {:.2f}'.format(time.perf_counter() - start))

        dim = input_sizes[cp_coef]
        size = (3, dim, dim)

        run_inf(model, size, cp_coef, start_bs=1)
def read_images():
    for filename in os.listdir(imgfile_path):
        ori_imgs, framed_imgs, framed_metas = preprocess(os.path.join(
            imgfile_path, filename),
                                                         max_size=input_size)
        if use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        model = EfficientDetBackbone(compound_coef=7,
                                     num_classes=len(obj_list),
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)
        model.load_state_dict(
            torch.load(f'weights/efficientdet-d7/efficientdet-d7.pth')
        )  #place weight path here
        model.requires_grad_(False)
        model.eval()

        if use_cuda:
            model = model.cuda()
        if use_float16:
            model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, threshold,
                              iou_threshold)

        out = invert_affine(framed_metas, out)
        display(filename, out, ori_imgs, imshow=False, imwrite=True)

        print('running speed test...')
        with torch.no_grad():
            print('test1: model inferring and postprocessing')
            print('inferring image for 10 times...')
            t1 = time.time()
            for _ in range(10):
                _, regression, classification, anchors = model(x)

                out = postprocess(x, anchors, regression, classification,
                                  regressBoxes, clipBoxes, threshold,
                                  iou_threshold)
                out = invert_affine(framed_metas, out)

            t2 = time.time()
            tact_time = (t2 - t1) / 10
            print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
Exemplo n.º 6
0
def test(threshold=0.2):
    with open("datasets/vcoco/new_prior_mask.pkl", "rb") as file:
        prior_mask = pickle.load(file, encoding="bytes")

    model = EfficientDetBackbone(num_classes=len(eval(params["obj_list"])),
                                 num_union_classes=25,
                                 num_inst_classes=51,
                                 compound_coef=args.compound_coef,
                                 ratios=eval(params["anchors_ratios"]),
                                 scales=eval(params["anchors_scales"]))
    model.load_state_dict(
        torch.load(weights_path, map_location=torch.device('cpu')))
    model.requires_grad_(False)
    model.eval()

    if args.cuda:
        model = model.cuda()
    if args.float16:
        model = model.half()

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    img_dir = os.path.join(data_dir, "vcoco/coco/images/%s" % "val2014")

    with open(os.path.join(data_dir, 'vcoco/data/splits/vcoco_test.ids'),
              'r') as f:
        image_ids = f.readlines()
    image_ids = [int(id) for id in image_ids]

    _t = {'im_detect': Timer(), 'misc': Timer()}
    detection = []

    for i, image_id in enumerate(image_ids):

        _t['im_detect'].tic()

        file = "COCO_val2014_" + (str(image_id)).zfill(12) + '.jpg'

        img_detection = img_detect(file,
                                   img_dir,
                                   model,
                                   input_size,
                                   regressBoxes,
                                   clipBoxes,
                                   prior_mask,
                                   threshold=threshold)
        detection.extend(img_detection)
        if need_visual:
            visual(img_detection, image_id)
        _t['im_detect'].toc()

        print('im_detect: {:d}/{:d}, average time: {:.3f}s'.format(
            i + 1, len(image_ids), _t['im_detect'].average_time))

    with open(detection_path, "wb") as file:
        pickle.dump(detection, file)
Exemplo n.º 7
0
def test(threshold=0.2):
    model = EfficientDetBackbone(num_classes=num_objects,
                                 num_union_classes=num_union_actions,
                                 num_inst_classes=num_inst_actions,
                                 compound_coef=args.compound_coef,
                                 ratios=eval(params["anchors_ratios"]),
                                 scales=eval(params["anchors_scales"]))
    model.load_state_dict(
        torch.load(weights_path, map_location=torch.device('cpu')))
    model.requires_grad_(False)
    model.eval()

    if args.cuda:
        model = model.cuda()
    if args.float16:
        model = model.half()

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    img_dir = os.path.join(data_dir,
                           "hico_20160224_det/images/%s" % "test2015")

    _t = {'im_detect': Timer(), 'misc': Timer()}
    detection = {}

    count = 0
    for line in glob.iglob(img_dir + '/' + '*.jpg'):
        count += 1

        _t['im_detect'].tic()
        image_id = int(line[-9:-4])

        file = "HICO_test2015_" + (str(image_id)).zfill(8) + ".jpg"

        # if file != "COCO_val2014_000000001987.jpg":
        #     continue

        dets = img_detect(file,
                          img_dir,
                          model,
                          input_size,
                          regressBoxes,
                          clipBoxes,
                          threshold=threshold)

        detection[image_id] = dets
        # detection.extend(img_detection)
        _t['im_detect'].toc()

        print('im_detect: {:d}/{:d}, average time: {:.3f}s'.format(
            count, 9658, _t['im_detect'].average_time))

    with open(detection_path, "wb") as file:
        pickle.dump(detection, file)
def test(opt):
    params = Params(f'projects/{opt.project}.yml')
    project_name = params.project_name
    obj_list = params.obj_list
    compound_coef = opt.compound_coef
    force_input_size = None  # set None to use default size
    img_dir = opt.img_dir
    model_path = opt.model_path

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list))
    model.load_state_dict(torch.load(model_path))
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    gt = COCO(opt.ann_file)
    gt_lst = load_coco_bboxes(gt, is_gt=True)

    imgs = glob.glob(os.path.join(img_dir, '*.jpg'))
    det_lst = []
    progressbar = tqdm(imgs)
    for i, img in enumerate(progressbar):
        det = single_img_test(img, input_size, model, use_cuda, use_float16)
        det_lst.extend(det)
        progressbar.update()
        progressbar.set_description('Step: {}/{}'.format(i, len(imgs)))


    evaluator = Evaluator()
    ret, mAP = evaluator.GetMAPbyClass(
        gt_lst,
        det_lst,
        method='EveryPointInterpolation'
    )
    # Get metric values per each class
    for metricsPerClass in ret:
        cl = metricsPerClass['class']
        ap = metricsPerClass['AP']
        ap_str = '{0:.3f}'.format(ap)
        print('AP: %s (%s)' % (ap_str, cl))
    mAP_str = '{0:.3f}'.format(mAP)
    print('mAP: %s\n' % mAP_str)
Exemplo n.º 9
0
def load_model(compound_coef, obj_list, params, weights_path, use_cuda,
               use_float16):
    model = EfficientDetBackbone(compound_coef=compound_coef,
                                 num_classes=len(obj_list),
                                 ratios=eval(params['anchors_ratios']),
                                 scales=eval(params['anchors_scales']))
    model.load_state_dict(
        torch.load(weights_path, map_location=torch.device('cpu')))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model.cuda(gpu)

        if use_float16:
            model.half()
    return model
def main():
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]

    models = []
    for cp_coef in range(8):
        model = EfficientDetBackbone(compound_coef=cp_coef,
                                     num_classes=90,
                                     ratios=anchor_ratios,
                                     scales=anchor_scales)
        model.load_state_dict(
            torch.load(f'weights/efficientdet-d{cp_coef}.pth',
                       map_location='cpu'))
        model.requires_grad_(False)
        model.eval()
        model = model.cuda()
        models.append(model)

    while True:
        time.sleep(1)
Exemplo n.º 11
0
def load_model(weights_path: Union[str, os.PathLike],
               p_cfg_path: Union[str, os.PathLike],
               compound_coef: float) -> EfficientDetBackbone :
    """Loads and return model with given weights and project config.

    Args:
        weights_path (Union[str, os.PathLike]): Path to model weights.
        p_cfg_path (Union[str, os.PathLike]): Path to Project config yaml file.
        compound_coef (float): Compund scaling coefficient.

    Returns:
        EfficientDetBackbone: EfficientDet model
    """      
    params = yaml.safe_load(open(p_cfg_path))
    obj_list = params['obj_list']
    model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                                     ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales']))
    model.load_state_dict(torch.load(weights_path, map_location=DEVICE))
    if USE_CUDA:
        model.cuda()
    model.requires_grad_(False)
    model.eval()
    return model
def model_fn(model_dir):
    # based entirely off of
    # https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/blob/master/coco_eval.py
    print(f'building and loading efficientdet d{EFFICIENTDET_COMPOUND_COEF}')
    model = EfficientDetBackbone(compound_coef=EFFICIENTDET_COMPOUND_COEF,
                                 num_classes=len(PARAMS['obj_list']),
                                 ratios=eval(PARAMS['anchors_ratios']),
                                 scales=eval(PARAMS['anchors_scales']))
    state_dict = torch.hub.load_state_dict_from_url(
        url=get_weights_url(c=EFFICIENTDET_COMPOUND_COEF),
        model_dir=model_dir,
        map_location=torch.device('cpu'))
    model.load_state_dict(state_dict)
    model.requires_grad_(False)
    model.eval()

    if USE_CUDA:
        model.cuda(0)

    if USE_FLOAT16:
        model.half()

    return model
Exemplo n.º 13
0
def main():
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]

    for cp_coef in range(8):
        print()
        print('Model: d' + str(cp_coef), '>>>')

        total_time = 0
        for i in range(100):
            if i % 10 == 0:
                start = time.perf_counter()
                model = EfficientDetBackbone(compound_coef=cp_coef, num_classes=90, ratios=anchor_ratios, scales=anchor_scales)
                model.load_state_dict(torch.load(f'weights/efficientdet-d{cp_coef}.pth', map_location='cpu'))
                model.requires_grad_(False)
                model.eval()
                model = model.cuda()
                total_time += time.perf_counter() - start
            else:
                model = None

        print('Loading(s): {:.2f}'.format(total_time / 10))
Exemplo n.º 14
0
def model_init(args):
    compound_coef = args.compound_coef
    checkpoint = args.checkpoint
    use_cuda = not args.cpu
    cudnn.fastest = True
    cudnn.benchmark = True

    # replace this part with your project's anchor config
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]

    # tf bilinear interpolation is different from any other's, just make do
    model = EfficientDetBackbone(compound_coef=compound_coef,
                                 num_classes=90,
                                 ratios=anchor_ratios,
                                 scales=anchor_scales)
    model.load_state_dict(torch.load(checkpoint, map_location='cpu'))
    model.requires_grad_(False)
    model.eval()
    if use_cuda:
        model = model.cuda()

    return model
def eval(pretrained_weights: Path, inputs_splitted_into_lists: list,
         compound_coef: int, use_cuda: bool) -> list:
    threshold = 0.2
    iou_threshold = 0.2
    # replace this part with your project's anchor config
    anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
    anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]

    model = EfficientDetBackbone(compound_coef=compound_coef,
                                 num_classes=1,
                                 ratios=anchor_ratios,
                                 scales=anchor_scales)
    model.load_state_dict(torch.load(pretrained_weights, map_location='cpu'))
    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    predictions = []

    for inputs_split in inputs_splitted_into_lists:
        with torch.no_grad():
            features, regression, classification, anchors = model(inputs_split)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(inputs_split, anchors, regression,
                              classification, regressBoxes, clipBoxes,
                              threshold, iou_threshold)

            predictions += out

    return predictions
Exemplo n.º 16
0
def load_apex_model(compound_coef, obj_list, params, weights_path):
    opt_level = 'O1'

    model = EfficientDetBackbone(compound_coef=compound_coef,
                                 num_classes=len(obj_list),
                                 ratios=eval(params['anchors_ratios']),
                                 scales=eval(params['anchors_scales']))
    checkpoint = torch.load(weights_path)

    model = model.cuda(gpu)
    optimizer = torch.optim.AdamW(model.parameters(), lr)
    model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level)
    model.load_state_dict(checkpoint['model'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    amp.load_state_dict(checkpoint['amp'])

    model.requires_grad_(False)
    model.cuda(gpu)
    model = model.eval()

    return model
Exemplo n.º 17
0
def train(opt):
    params = Params(f'projects/{opt.project}.yml')

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    opt.saved_path = opt.saved_path + f'/{params.project_name}/'
    opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    training_set = CocoDataset(root_dir=os.path.join(opt.data_path,
                                                     params.project_name),
                               set=params.train_set,
                               phase='train',
                               transforms=get_train_transforms())

    val_set = CocoDataset(root_dir=os.path.join(opt.data_path,
                                                params.project_name),
                          set=params.val_set,
                          phase='val',
                          transforms=get_valid_transforms())
    training_generator = torch.utils.data.DataLoader(
        training_set,
        batch_size=opt.batch_size,
        sampler=RandomSampler(training_set),
        pin_memory=False,
        drop_last=True,
        num_workers=opt.num_workers,
        collate_fn=collate_fn,
    )
    val_generator = torch.utils.data.DataLoader(
        val_set,
        batch_size=opt.batch_size,
        num_workers=opt.num_workers,
        shuffle=False,
        sampler=SequentialSampler(val_set),
        pin_memory=False,
        collate_fn=collate_fn,
    )

    model = EfficientDetBackbone(num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios),
                                 scales=eval(params.anchors_scales))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(
        opt.log_path +
        f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    opt.lr,
                                    momentum=0.9,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    accumulation_steps = 32
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for iter, (imgs, annots) in enumerate(progress_bar):
                pass
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = torch.stack(imgs)
                    annot = pad_annots(annots)

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()
                    # print(annot)

                    # optimizer.zero_grad()
                    cls_loss, reg_loss = model(imgs,
                                               annot,
                                               obj_list=params.obj_list)
                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    if (iter + 1) % (accumulation_steps //
                                     opt.batch_size) == 0:
                        # print('step')
                        optimizer.step()
                        optimizer.zero_grad()
                    # optimizer.step()

                    epoch_loss.append(float(loss))

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                        .format(step, epoch, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(
                            model,
                            f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                        )
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for iter, (imgs, annots) in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = torch.stack(imgs)
                        annot = pad_annots(annots)

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss = model(imgs,
                                                   annot,
                                                   obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                    .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                   step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(
                        model,
                        f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    )

                model.train()

                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print(
                        '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                        .format(epoch, best_loss))
                    break
    except KeyboardInterrupt:
        save_checkpoint(
            model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        writer.close()
    writer.close()
Exemplo n.º 18
0
def train(opt):
    params = Params(f'projects/{opt.project}_crop.yml')

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '1-'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    save_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    opt.saved_path = opt.saved_path + f'/{params.project_name}/crop/weights/{save_time}'
    opt.log_path = opt.log_path + f'/{params.project_name}/crop/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)
    print('save_path :', opt.saved_path)
    print('log_path :', opt.log_path)

    training_params = {
        'batch_size': opt.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    val_params = {
        'batch_size': opt.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    training_set = Project42Dataset(root_dir=os.path.join(
        opt.data_path, params.project_name, 'crop'),
                                    set=params.train_set,
                                    params=params,
                                    transform=transforms.Compose([
                                        Normalizer(mean=params.mean,
                                                   std=params.std),
                                        Augmenter(),
                                        Resizer(input_sizes[opt.compound_coef])
                                    ]))
    training_generator = DataLoader(training_set, **training_params)

    val_set = Project42Dataset(root_dir=os.path.join(opt.data_path,
                                                     params.project_name,
                                                     'crop'),
                               set=params.val_set,
                               params=params,
                               transform=transforms.Compose([
                                   Normalizer(mean=params.mean,
                                              std=params.std),
                                   Resizer(input_sizes[opt.compound_coef])
                               ]))
    val_generator = DataLoader(val_set, **val_params)

    # labels
    labels = training_set.labels
    print('label:', labels)

    model = EfficientDetBackbone(num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios),
                                 scales=eval(params.anchors_scales))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(opt.log_path + f'/{save_time}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    opt.lr,
                                    momentum=0.9,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = data['img']
                    annot = data['annot']

                    ## train image show
                    # for idx in range(len(imgs)):
                    #     showshow = imgs[idx].numpy()
                    #     print(showshow.shape)
                    #     showshow = showshow.transpose(1, 2, 0)
                    #     a = annot[idx].numpy().reshape(5, )
                    #     img_show = cv2.rectangle(showshow, (a[0],a[1]), (a[2],a[3]), (0, 0, 0), 3)
                    #     cv2.imshow(f'{idx}_{params.obj_list[int(a[4])]}', img_show)
                    #     cv2.waitKey(1000)
                    #     cv2.destroyAllWindows()

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    cls_loss, reg_loss, regression, classification, anchors = model(
                        imgs, annot, obj_list=params.obj_list)

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    # loss
                    epoch_loss.append(float(loss))

                    # mAP
                    threshold = 0.2
                    iou_threshold = 0.2

                    regressBoxes = BBoxTransform()
                    clipBoxes = ClipBoxes()

                    out = postprocess(imgs, anchors, regression,
                                      classification, regressBoxes, clipBoxes,
                                      threshold, iou_threshold)

                    mAP = mAP_score(annot, out, labels)
                    mAP = mAP.results['mAP']

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}. mAP: {:.2f}'
                        .format(step, epoch + 1, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item(), mAP))

                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)
                    writer.add_scalars('mAP', {'train': mAP}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(
                            model,
                            f'efficientdet-d{opt.compound_coef}_{epoch}.pth')
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []

                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss, regression, classification, anchors = model(
                            imgs, annot, obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                # mAP
                threshold = 0.2
                iou_threshold = 0.2

                regressBoxes = BBoxTransform()
                clipBoxes = ClipBoxes()

                out = postprocess(imgs, anchors, regression, classification,
                                  regressBoxes, clipBoxes, threshold,
                                  iou_threshold)

                mAP = mAP_score(annot, out, labels)
                mAP = mAP.results['mAP']

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}. mAP: {:.2f}'
                    .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                            loss, mAP))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                   step)
                writer.add_scalars('mAP', {'val': mAP}, step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(
                        model,
                        f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    )

                model.train()

                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print(
                        '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                        .format(epoch, best_loss))
                    break
    except KeyboardInterrupt:
        save_checkpoint(
            model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        writer.close()
    writer.close()
Exemplo n.º 19
0
def train(opt):
    params = Params(f'projects/{opt.project}.yml')

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    opt.saved_path = opt.saved_path + f'/{params.project_name}/'
    opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    training_params = {
        'batch_size': opt.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    val_params = {
        'batch_size': opt.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    training_set = CocoDataset(root_dir=opt.data_path + params.project_name,
                               set=params.train_set,
                               transform=transforms.Compose([
                                   Normalizer(mean=params.mean,
                                              std=params.std),
                                   Augmenter(),
                                   Resizer(input_sizes[opt.compound_coef])
                               ]))
    training_generator = DataLoader(training_set, **training_params)

    val_set = CocoDataset(root_dir=opt.data_path + params.project_name,
                          set=params.val_set,
                          transform=transforms.Compose([
                              Normalizer(mean=params.mean, std=params.std),
                              Resizer(input_sizes[opt.compound_coef])
                          ]))
    val_generator = DataLoader(val_set, **val_params)

    model = EfficientDetBackbone(num_anchors=9,
                                 num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef)

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0
        model.load_state_dict(torch.load(weights_path))
        print(
            f'loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)

    writer = SummaryWriter(
        opt.log_path +
        f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    if params.num_gpus > 0:
        model = model.cuda()
        model = CustomDataParallel(model, params.num_gpus)

    optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    criterion = FocalLoss()

    best_loss = 1e5
    best_epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epochs):
        try:
            model.train()
            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                try:
                    imgs = data['img']
                    annot = data['annot']

                    if params.num_gpus > 0:
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    _, regression, classification, anchors = model(imgs)

                    cls_loss, reg_loss = criterion(
                        classification,
                        regression,
                        anchors,
                        annot,
                        # imgs=imgs, obj_list=params.obj_list  # uncomment this to debug
                    )

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    epoch_loss.append(float(loss))

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                        .format(step, epoch + 1, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                except Exception as e:
                    print(traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if step % opt.save_interval == 0 and step > 0:
                save_checkpoint(
                    model,
                    f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if params.num_gpus > 0:
                            annot = annot.cuda()
                        _, regression, classification, anchors = model(imgs)
                        cls_loss, reg_loss = criterion(classification,
                                                       regression, anchors,
                                                       annot)

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                    .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                            loss.mean()))
                writer.add_scalars('Total_loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                   step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(
                        model,
                        f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    )

                    # onnx export is not tested.
                    # dummy_input = torch.rand(opt.batch_size, 3, 512, 512)
                    # if torch.cuda.is_available():
                    #     dummy_input = dummy_input.cuda()
                    # if isinstance(model, nn.DataParallel):
                    #     model.module.backbone_net.model.set_swish(memory_efficient=False)
                    #
                    #     torch.onnx.export(model.module, dummy_input,
                    #                       os.path.join(opt.saved_path, 'signatrix_efficientdet_coco.onnx'),
                    #                       verbose=False)
                    #     model.module.backbone_net.model.set_swish(memory_efficient=True)
                    # else:
                    #     model.backbone_net.model.set_swish(memory_efficient=False)
                    #
                    #     torch.onnx.export(model, dummy_input,
                    #                       os.path.join(opt.saved_path, 'signatrix_efficientdet_coco.onnx'),
                    #                       verbose=False)
                    #     model.backbone_net.model.set_swish(memory_efficient=True)

                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print(
                        'Stop training at epoch {}. The lowest loss achieved is {}'
                        .format(epoch, loss))
                    break
            writer.close()
        except KeyboardInterrupt:
            save_checkpoint(
                model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
Exemplo n.º 20
0
def excuteModel(videoname):
    # Video's path
    # set int to use webcam, set str to read from a video file

    if videoname is not None:
        video_src = os.path.join(r'D:\GitHub\Detection\server\uploads', f"{videoname}.mp4")
    else:
        video_src = 'D:\\GitHub\\Detection\\server\AImodel\\videotest\\default.mp4'

    compound_coef = 2
    trained_weights = 'D:\\GitHub\\Detection\\server\\AImodel\\weights\\efficientdet-video.pth'

    force_input_size = None  # set None to use default size

    threshold = 0.2
    iou_threshold = 0.2

    use_cuda = True
    use_float16 = False
    cudnn.fastest = True
    cudnn.benchmark = True

    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                'toothbrush']

    # tf bilinear interpolation is different from any other's, just make do
    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size

    # load model
    model = EfficientDetBackbone(
        compound_coef=compound_coef, num_classes=len(obj_list))
    model.load_state_dict(torch.load(trained_weights))

    model.requires_grad_(False)
    model.eval()

    if use_cuda:
        model = model.cuda()
    if use_float16:
        model = model.half()

    # function for display

    # Box
    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()

    # Video capture
    cap = cv2.VideoCapture(video_src)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    writer = None
    # try to determine the total number of frames in the video file
    try:
        prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \
            else cv2.CAP_PROP_FRAME_COUNT
        total = int(vs.get(prop))
        print("[INFO] {} total frames in video".format(total))

    # an error occurred while trying to determine the total
    # number of frames in the video file
    except:
        print("[INFO] could not determine # of frames in video")
        total = -1

    path_out = os.path.join(os.path.dirname(
        os.path.abspath(__file__)), 'outvideo')

    path_result = r"D:\GitHub\Detection\server\AImodel\videotest\default.mp4"
    path_asset = r"D:\GitHub\Detection\client\src\assets"
    for i in range(0, length):
        ret, frame = cap.read()
        if not ret:
            break

        # frame preprocessing
        ori_imgs, framed_imgs, framed_metas = preprocess_video(
            frame, max_size=input_size)

        if use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda()
                             for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32 if not use_float16 else torch.float16).permute(
            0, 3, 1, 2)

        # model predict
        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            out = postprocess(x,
                              anchors, regression, classification,
                              regressBoxes, clipBoxes,
                              threshold, iou_threshold)

        # result
        out = invert_affine(framed_metas, out)
        img_show = display(out, ori_imgs, obj_list)

        if writer is None:

            # initialize our video writer
            fourcc = 0x00000021
            #fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            if videoname is not None:
                path_result = os.path.join(path_out, f"{videoname}.mp4")
            else:
                path_result = os.path.join(path_out, "default.mp4")

            writer = cv2.VideoWriter(path_result, fourcc, 30, (img_show.shape[1], img_show.shape[0]), True)


        # write the output frame to disk
        writer.write(img_show)
        print("Processing data... " + str(round((i+1)/length, 3)*100) + " %")
        # show frame by frame
        #cv2.imshow('frame', img_show)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    print("[INFO] cleaning up...")

    writer.release()
    cap.release()
    cv2.destroyAllWindows()

    if videoname is not None:
        path_asset = os.path.join(path_asset, f"{videoname}.mp4")
    else:
        path_asset = os.path.join(path_asset, "default.mp4")
    copyfile(path_result, path_asset)
    return path_asset
    def start_training(self):
        if self.system_dict["params"]["num_gpus"] == 0:
            os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

        if torch.cuda.is_available():
            torch.cuda.manual_seed(42)
        else:
            torch.manual_seed(42)

        self.system_dict["params"]["saved_path"] = self.system_dict["params"][
            "saved_path"] + "/" + self.system_dict["params"][
                "project_name"] + "/"
        self.system_dict["params"]["log_path"] = self.system_dict["params"][
            "log_path"] + "/" + self.system_dict["params"][
                "project_name"] + "/tensorboard/"
        os.makedirs(self.system_dict["params"]["saved_path"], exist_ok=True)
        os.makedirs(self.system_dict["params"]["log_path"], exist_ok=True)

        training_params = {
            'batch_size': self.system_dict["params"]["batch_size"],
            'shuffle': True,
            'drop_last': True,
            'collate_fn': collater,
            'num_workers': self.system_dict["params"]["num_workers"]
        }

        val_params = {
            'batch_size': self.system_dict["params"]["batch_size"],
            'shuffle': False,
            'drop_last': True,
            'collate_fn': collater,
            'num_workers': self.system_dict["params"]["num_workers"]
        }

        input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        training_set = CocoDataset(
            self.system_dict["dataset"]["train"]["root_dir"],
            self.system_dict["dataset"]["train"]["coco_dir"],
            self.system_dict["dataset"]["train"]["img_dir"],
            set_dir=self.system_dict["dataset"]["train"]["set_dir"],
            transform=transforms.Compose([
                Normalizer(mean=self.system_dict["params"]["mean"],
                           std=self.system_dict["params"]["std"]),
                Augmenter(),
                Resizer(
                    input_sizes[self.system_dict["params"]["compound_coef"]])
            ]))
        training_generator = DataLoader(training_set, **training_params)

        if (self.system_dict["dataset"]["val"]["status"]):
            val_set = CocoDataset(
                self.system_dict["dataset"]["val"]["root_dir"],
                self.system_dict["dataset"]["val"]["coco_dir"],
                self.system_dict["dataset"]["val"]["img_dir"],
                set_dir=self.system_dict["dataset"]["val"]["set_dir"],
                transform=transforms.Compose([
                    Normalizer(self.system_dict["params"]["mean"],
                               self.system_dict["params"]["std"]),
                    Resizer(input_sizes[self.system_dict["params"]
                                        ["compound_coef"]])
                ]))
            val_generator = DataLoader(val_set, **val_params)

        print("")
        print("")
        model = EfficientDetBackbone(
            num_classes=len(self.system_dict["params"]["obj_list"]),
            compound_coef=self.system_dict["params"]["compound_coef"],
            ratios=eval(self.system_dict["params"]["anchors_ratios"]),
            scales=eval(self.system_dict["params"]["anchors_scales"]))

        os.makedirs("pretrained_weights", exist_ok=True)

        if (self.system_dict["params"]["compound_coef"] == 0):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d0.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 1):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d1.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 2):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d2.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 3):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d3.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 4):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d4.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 5):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d5.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 6):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d6.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)
        elif (self.system_dict["params"]["compound_coef"] == 7):
            if (not os.path.isfile(
                    self.system_dict["params"]["load_weights"])):
                print("Downloading weights")
                cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d7.pth -O " + \
                            self.system_dict["params"]["load_weights"]
                os.system(cmd)

        # load last weights
        if self.system_dict["params"]["load_weights"] is not None:
            if self.system_dict["params"]["load_weights"].endswith('.pth'):
                weights_path = self.system_dict["params"]["load_weights"]
            else:
                weights_path = get_last_weights(
                    self.system_dict["params"]["saved_path"])
            try:
                last_step = int(
                    os.path.basename(weights_path).split('_')[-1].split('.')
                    [0])
            except:
                last_step = 0

            try:
                ret = model.load_state_dict(torch.load(weights_path),
                                            strict=False)
            except RuntimeError as e:
                print(f'[Warning] Ignoring {e}')
                print(
                    '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
                )

            print(
                f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
            )
        else:
            last_step = 0
            print('[Info] initializing weights...')
            init_weights(model)

        print("")
        print("")

        # freeze backbone if train head_only
        if self.system_dict["params"]["head_only"]:

            def freeze_backbone(m):
                classname = m.__class__.__name__
                for ntl in ['EfficientNet', 'BiFPN']:
                    if ntl in classname:
                        for param in m.parameters():
                            param.requires_grad = False

            model.apply(freeze_backbone)
            print('[Info] freezed backbone')

        print("")
        print("")

        if self.system_dict["params"]["num_gpus"] > 1 and self.system_dict[
                "params"]["batch_size"] // self.system_dict["params"][
                    "num_gpus"] < 4:
            model.apply(replace_w_sync_bn)
            use_sync_bn = True
        else:
            use_sync_bn = False

        writer = SummaryWriter(
            self.system_dict["params"]["log_path"] +
            f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

        model = ModelWithLoss(model, debug=self.system_dict["params"]["debug"])

        if self.system_dict["params"]["num_gpus"] > 0:
            model = model.cuda()
            if self.system_dict["params"]["num_gpus"] > 1:
                model = CustomDataParallel(
                    model, self.system_dict["params"]["num_gpus"])
                if use_sync_bn:
                    patch_replication_callback(model)

        if self.system_dict["params"]["optim"] == 'adamw':
            optimizer = torch.optim.AdamW(model.parameters(),
                                          self.system_dict["params"]["lr"])
        else:
            optimizer = torch.optim.SGD(model.parameters(),
                                        self.system_dict["params"]["lr"],
                                        momentum=0.9,
                                        nesterov=True)

        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               patience=3,
                                                               verbose=True)

        epoch = 0
        best_loss = 1e5
        best_epoch = 0
        step = max(0, last_step)
        model.train()

        num_iter_per_epoch = len(training_generator)

        try:
            for epoch in range(self.system_dict["params"]["num_epochs"]):
                last_epoch = step // num_iter_per_epoch
                if epoch < last_epoch:
                    continue

                epoch_loss = []
                progress_bar = tqdm(training_generator)
                for iter, data in enumerate(progress_bar):
                    if iter < step - last_epoch * num_iter_per_epoch:
                        progress_bar.update()
                        continue
                    try:
                        imgs = data['img']
                        annot = data['annot']

                        if self.system_dict["params"]["num_gpus"] == 1:
                            # if only one gpu, just send it to cuda:0
                            # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        optimizer.zero_grad()
                        cls_loss, reg_loss = model(
                            imgs,
                            annot,
                            obj_list=self.system_dict["params"]["obj_list"])
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss.backward()
                        # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                        optimizer.step()

                        epoch_loss.append(float(loss))

                        progress_bar.set_description(
                            'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                            .format(step, epoch,
                                    self.system_dict["params"]["num_epochs"],
                                    iter + 1, num_iter_per_epoch,
                                    cls_loss.item(), reg_loss.item(),
                                    loss.item()))
                        writer.add_scalars('Loss', {'train': loss}, step)
                        writer.add_scalars('Regression_loss',
                                           {'train': reg_loss}, step)
                        writer.add_scalars('Classfication_loss',
                                           {'train': cls_loss}, step)

                        # log learning_rate
                        current_lr = optimizer.param_groups[0]['lr']
                        writer.add_scalar('learning_rate', current_lr, step)

                        step += 1

                        if step % self.system_dict["params"][
                                "save_interval"] == 0 and step > 0:
                            self.save_checkpoint(
                                model,
                                f'efficientdet-d{self.system_dict["params"]["compound_coef"]}_trained.pth'
                            )
                            #print('checkpoint...')

                    except Exception as e:
                        print('[Error]', traceback.format_exc())
                        print(e)
                        continue
                scheduler.step(np.mean(epoch_loss))

                if epoch % self.system_dict["params"][
                        "val_interval"] == 0 and self.system_dict["dataset"][
                            "val"]["status"]:
                    print("Running validation")
                    model.eval()
                    loss_regression_ls = []
                    loss_classification_ls = []
                    for iter, data in enumerate(val_generator):
                        with torch.no_grad():
                            imgs = data['img']
                            annot = data['annot']

                            if self.system_dict["params"]["num_gpus"] == 1:
                                imgs = imgs.cuda()
                                annot = annot.cuda()

                            cls_loss, reg_loss = model(
                                imgs,
                                annot,
                                obj_list=self.system_dict["params"]
                                ["obj_list"])
                            cls_loss = cls_loss.mean()
                            reg_loss = reg_loss.mean()

                            loss = cls_loss + reg_loss
                            if loss == 0 or not torch.isfinite(loss):
                                continue

                            loss_classification_ls.append(cls_loss.item())
                            loss_regression_ls.append(reg_loss.item())

                    cls_loss = np.mean(loss_classification_ls)
                    reg_loss = np.mean(loss_regression_ls)
                    loss = cls_loss + reg_loss

                    print(
                        'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                        .format(epoch,
                                self.system_dict["params"]["num_epochs"],
                                cls_loss, reg_loss, loss))
                    writer.add_scalars('Loss', {'val': loss}, step)
                    writer.add_scalars('Regression_loss', {'val': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                       step)

                    if loss + self.system_dict["params"][
                            "es_min_delta"] < best_loss:
                        best_loss = loss
                        best_epoch = epoch

                        self.save_checkpoint(
                            model,
                            f'efficientdet-d{self.system_dict["params"]["compound_coef"]}_trained.pth'
                        )

                    model.train()

                    # Early stopping
                    if epoch - best_epoch > self.system_dict["params"][
                            "es_patience"] > 0:
                        print(
                            '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                            .format(epoch, best_loss))
                        break
        except KeyboardInterrupt:
            self.save_checkpoint(
                model,
                f'efficientdet-d{self.system_dict["params"]["compound_coef"]}_trained.pth'
            )
            writer.close()
        writer.close()

        print("")
        print("")
        print("Training complete")
def batch_inference(args):
    input_size = input_sizes[args.compound_coef]
    model = EfficientDetBackbone(compound_coef=args.compound_coef,
                                 num_classes=len(obj_list),
                                 ratios=anchor_ratios,
                                 scales=anchor_scales)

    # load pth file
    model.load_state_dict(torch.load(args.pth, map_location='cpu'))
    model.requires_grad_(False)
    model.eval()

    if args.use_cuda:
        model = model.cuda(device=args.device)

    path = args.file_list
    imgpath = args.img_path
    content = []
    with open(path, 'r') as f_in:
        lines = f_in.readlines()
        for idx in range(len(lines)):
            line = lines[idx]
            line = line.strip().split(' ')
            content.append(line[0])

    for i in tqdm(range(len(content)), ncols=88):
        filebasename = content[i]
        img_path = os.path.join(imgpath, filebasename + '.jpg')
        try:
            ori_imgs, framed_imgs, framed_metas = eval_preprocess(
                img_path, max_size=input_size)
        except:
            f'{img_path.split("/")[-1]} is not in {args.img_path}'

        if args.use_cuda:
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32).permute(0, 3, 1, 2)

        with torch.no_grad():
            features, regression, classification, anchors = model(x)

            regressBoxes = Rotation_BBoxTransform()
            clipBoxes = ClipBoxes()
            addBoxes = BBoxAddScores()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, addBoxes,
                              args.score_threshold, args.iou_threshold)
        out = invert_affine(framed_metas, out)
        file_name = ['Task1_large-vehicle.txt', 'Task1_small-vehicle.txt']
        rois = out[0]['rois']
        class_ids = out[0]['class_ids']
        scores = out[0]['scores']

        filecontent = []
        for ii in range(len(scores)):
            xmin, ymin, xmax, ymax, theta = rois[ii]
            rect = OPENCV2xywh([xmin, ymin, xmax, ymax, theta])[0].tolist()
            x1, y1 = float(rect[0][0]), float(rect[0][1])
            x2, y2 = float(rect[1][0]), float(rect[1][1])
            x3, y3 = float(rect[2][0]), float(rect[2][1])
            x4, y4 = float(rect[3][0]), float(rect[3][1])
            single_filecontent = [
                int(class_ids[ii]), filebasename,
                float(scores[ii]), x1, y1, x2, y2, x3, y3, x4, y4
            ]
            filecontent.append(single_filecontent)

        write_into_txt(file_name, filecontent)
Exemplo n.º 23
0
class PTVisionService(PTServingBaseService):
    def __init__(self, model_name, model_path):
        # 调用父类构造方法
        super(PTVisionService, self).__init__(model_name, model_path)
        # 调用自定义函数加载模型
        checkpoint_file = model_path
        params = yaml.safe_load(
            open(f'/home/mind/model/projects/{cfg.project}.yml'))
        self.model = EfficientDetBackbone(
            compound_coef=cfg.compound_coef,
            num_classes=len(cfg.category),
            ratios=eval(params['anchors_ratios']),
            scales=eval(params['anchors_scales']))
        self.model.load_state_dict(
            torch.load(checkpoint_file, map_location=torch.device('cpu')))
        self.model.requires_grad_(False)
        self.model.eval()
        # self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.input_sizes = [512, 896, 768, 896, 1024, 1280, 1280, 1536]
        self.class_dict = dict([val, key] for key, val in cfg.category.items())

    def _preprocess(self, data):
        # https两种请求形式
        # 1. form-data文件格式的请求对应:data = {"请求key值":{"文件名":<文件io>}}
        # 2. json格式对应:data = json.loads("接口传入的json体")
        imgs_path = []
        for k, v in data.items():
            for file_name, file_content in v.items():
                imgs_path.append(file_content)

        return imgs_path

    def _inference(self, imgs_path):
        results = []
        regressBoxes = BBoxTransform()
        clipBoxes = ClipBoxes()
        for img_path in imgs_path:
            ori_imgs, framed_imgs, framed_metas = preprocess(
                [img_path], max_size=self.input_sizes[cfg.compound_coef])
            x = torch.from_numpy(framed_imgs[0]).float()
            x = x.unsqueeze(0).permute(0, 3, 1, 2)

            features, regression, classification, anchors = self.model(x)
            preds = self._my_postprocess(x, anchors, regression,
                                         classification, regressBoxes,
                                         clipBoxes, cfg.threshold,
                                         cfg.nms_threshold)

            preds = invert_affine(framed_metas, preds)[0]
            scores = preds['scores']
            class_ids = preds['class_ids']
            rois = preds['rois']
            image_result = {
                'detection_classes': [],
                'detection_boxes': [],
                'detection_scores': []
            }
            if rois.shape[0] > 0:
                bbox_score = scores

                for roi_id in range(rois.shape[0]):
                    score = float(bbox_score[roi_id])
                    label = int(class_ids[roi_id])
                    box = rois[roi_id, :]
                    image_result['detection_classes'].append(
                        self.class_dict[label + 1])
                    image_result['detection_boxes'].append(box.tolist())
                    image_result['detection_scores'].append(score)

            results.append(image_result)

        return results

    def _postprocess(self, data):
        if len(data) == 1:
            return data[0]
        else:
            return data

    def _my_postprocess(self, x, anchors, regression, classification,
                        regressBoxes, clipBoxes, threshold, iou_threshold):
        transformed_anchors = regressBoxes(anchors, regression)
        transformed_anchors = clipBoxes(transformed_anchors, x)
        scores = torch.max(classification, dim=2, keepdim=True)[0]
        scores_over_thresh = (scores > threshold)[:, :, 0]
        out = []
        for i in range(x.shape[0]):
            if scores_over_thresh[i].sum() == 0:
                out.append({
                    'rois': np.array(()),
                    'class_ids': np.array(()),
                    'scores': np.array(()),
                })
                continue

            classification_per = classification[i, scores_over_thresh[i, :],
                                                ...].permute(1, 0)
            transformed_anchors_per = transformed_anchors[
                i, scores_over_thresh[i, :], ...]
            scores_per = scores[i, scores_over_thresh[i, :], ...]
            scores_, classes_ = classification_per.max(dim=0)
            anchors_nms_idx = batched_nms(transformed_anchors_per,
                                          scores_per[:, 0],
                                          classes_,
                                          iou_threshold=iou_threshold)

            if anchors_nms_idx.shape[0] != 0:
                classes_ = classes_[anchors_nms_idx]
                scores_ = scores_[anchors_nms_idx]
                boxes_ = transformed_anchors_per[anchors_nms_idx, :]
                boxes_ = boxes_[:, [1, 0, 3, 2]]

                out.append({
                    'rois': boxes_.numpy(),
                    'class_ids': classes_.numpy(),
                    'scores': scores_.numpy(),
                })
            else:
                out.append({
                    'rois': np.array(()),
                    'class_ids': np.array(()),
                    'scores': np.array(()),
                })

        return out
Exemplo n.º 24
0
def train(opt):
    params = Params(f'projects/{opt.project}.yml')
    global_validation_it = 0

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    opt.saved_path = opt.saved_path + f'/{params.project_name}/'
    opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/'
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    training_params = {
        'batch_size': opt.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': TUMuchTrafficDataset.collater,
        'num_workers': opt.num_workers
    }

    val_params = {
        'batch_size': opt.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': TUMuchTrafficDataset.collater,
        'num_workers': opt.num_workers
    }

    advprop = opt.advprop
    if advprop:  # for models using advprop pretrained weights
        normalize = transforms.Lambda(
            lambda mem: {
                "img": (mem["img"] * 2.0 - 1.0).astype(np.float32),
                "annot": mem["annot"]
            })
    else:  # for other models
        normalize = Normalizer(mean=[0.485, 0.456, 0.406],
                               std=[0.229, 0.224, 0.225])

    tfs = transforms.Compose([
        TopCutter(886),
        transforms.RandomApply([Negate()], p=0.1),
        transforms.RandomApply([ContrastEnhancementWithNoiseReduction()],
                               p=0.1),
        Resize(384),
        RandomCrop(384, 768), normalize,
        HorizontalFlip(prob=0.5),
        transforms.RandomApply([AddGaussianNoise(0, 2.55)], p=0.5),
        transforms.RandomApply([AddSaltAndPepperNoise(prob=0.0017)], p=0.5),
        ToTensor()
    ])
    tfrecord_paths = [opt.data_path
                      ] if opt.data_path.endswith(".tfrecord") else [
                          str(x.absolute())
                          for x in Path(opt.data_path).rglob('*.tfrecord')
                      ]
    training_set = TUMuchTrafficDataset(tfrecord_paths=tfrecord_paths,
                                        transform=tfs)
    training_generator = DataLoader(training_set, **training_params)

    tfrecord_paths = [opt.data_path
                      ] if opt.data_path.endswith(".tfrecord") else [
                          str(x.absolute())
                          for x in Path(opt.val_path).rglob('*.tfrecord')
                      ]
    val_set = TUMuchTrafficDataset(tfrecord_paths=tfrecord_paths,
                                   transform=tfs)
    val_generator = DataLoader(val_set, **val_params)

    if not opt.load_backbone:
        load_weights = False
    else:
        load_weights = True
    model = EfficientDetBackbone(num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios),
                                 scales=eval(params.anchors_scales),
                                 load_weights=load_weights)
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print("# Params: {:08d}".format(pytorch_total_params))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # freeze backbone (only efficientnet) if train no_effnet
    if opt.no_effnet:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')
    pytorch_total_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    print("# Training Parameters: {:06}".format(pytorch_total_params))

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(
        opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M")}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    opt.lr,
                                    momentum=0.9,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=1e6,
                                                           verbose=True)

    # use apex for mixed precision training
    # model, optimizer = amp.initialize(model, optimizer)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            progress_bar = tqdm(training_generator)
            for it, data in enumerate(progress_bar):
                if it < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = data['img']
                    annot = data['annot']

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    global_validation_it += 1
                    optimizer.zero_grad()

                    cls_loss, reg_loss = model(imgs, annot)
                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    epoch_loss.append(float(loss))

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                        .format(step, epoch, opt.num_epochs, it + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(
                            model,
                            f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                        )
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            # sleep for 30 seconds, to reduce overheating
            import time
            time.sleep(30)

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for it, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']
                        if params.num_gpus == 1:
                            # if only one gpu, just send it to cuda:0
                            # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                            imgs = imgs.cuda()
                            annot = annot.cuda()
                        if it < 12:
                            plot_tensorboard(imgs, annot, model, writer,
                                             global_validation_it, it, "")
                            global_validation_it += 1

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss = model(imgs,
                                                   annot,
                                                   obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                    .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                   step)

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch

                    save_checkpoint(
                        model,
                        f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    )

                model.train()

                # Early stopping
                if epoch - best_epoch > opt.es_patience > 0:
                    print(
                        '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                        .format(epoch, best_loss))
                    break
    except KeyboardInterrupt:
        save_checkpoint(
            model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        writer.close()
    writer.close()

color_list = standard_to_bgr(STANDARD_COLORS)
# tf bilinear interpolation is different from any other's, just make do
input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size



# Load model
print("Loading Model ...");
model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list),
                             ratios=anchor_ratios, scales=anchor_scales)
model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth'))
model.requires_grad_(False)
model = model.eval()


if use_cuda:
    model = model.cuda()
if use_float16:
    model = model.half()



print("Running Inference on Image ...");
ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size)

if use_cuda:
    x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
else:
Exemplo n.º 26
0
class EfficientDet(object):
    obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
                'train', 'truck', 'boat', 'traffic light',
                'fire hydrant', '', 'stop sign', 'parking meter', 'bench',
                'bird', 'cat', 'dog', 'horse', 'sheep',
                'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack',
                'umbrella', '', '', 'handbag', 'tie',
                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
                'kite', 'baseball bat', 'baseball glove',
                'skateboard', 'surfboard', 'tennis racket', 'bottle', '',
                'wine glass', 'cup', 'fork', 'knife', 'spoon',
                'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
                'carrot', 'hot dog', 'pizza', 'donut',
                'cake', 'chair', 'couch', 'potted plant', 'bed', '',
                'dining table', '', '', 'toilet', '', 'tv',
                'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
                'microwave', 'oven', 'toaster', 'sink',
                'refrigerator', '', 'book', 'clock', 'vase', 'scissors',
                'teddy bear', 'hair drier', 'toothbrush']

    def __init__(self, weightfile, score_thresh,
                 nms_thresh, is_xywh=True, use_cuda=True, use_float16=False):
        print('Loading weights from %s... Done!' % (weightfile))

        # constants
        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.use_cuda = use_cuda
        self.is_xywh = is_xywh

        compound_coef = 0
        force_input_size = None  # set None to use default size

        self.use_float16 = False
        cudnn.fastest = True
        cudnn.benchmark = True

        # tf bilinear interpolation is different from any other's, just make do
        input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.input_size = input_sizes[compound_coef] if \
            force_input_size is None else force_input_size

        # load model
        self.model = EfficientDetBackbone(compound_coef=compound_coef,
                                          num_classes=len(self.obj_list))
        # f'weights/efficientdet-d{compound_coef}.pth'
        self.model.load_state_dict(torch.load(weightfile))
        self.model.requires_grad_(False)
        self.model.eval()

        if self.use_cuda:
            self.model = self.model.cuda()
        if self.use_float16:
            self.model = self.model.half()

        # Box
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()

    def __call__(self, imgs):
        # frame preprocessing
        _, framed_imgs, framed_metas = preprocess(imgs,
                                                  max_size=self.input_size)

        if self.use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        dtype = torch.float32 if not self.use_float16 else torch.float16
        x = x.to(dtype).permute(0, 3, 1, 2)

        # model predict
        with torch.no_grad():
            features, regression, classification, anchors = self.model(x)

            out = postprocess(x,
                              anchors, regression, classification,
                              self.regressBoxes, self.clipBoxes,
                              self.score_thresh, self.nms_thresh)

        # result
        out = invert_affine(framed_metas, out)

        if len(out) == 0:
            return None, None, None

        rois = [o['rois'] for o in out]
        scores = [o['scores'] for o in out]
        class_ids = [o['class_ids'] for o in out]
        if self.is_xywh:
            return xyxy_to_xywh(rois), scores, class_ids
        else:
            return rois, scores, class_ids
    coco_eval.summarize()


if __name__ == '__main__':
    SET_NAME = params['val_set']
    VAL_GT = f'datasets/{params["project_name"]}/{SET_NAME}.json'
    VAL_IMGS = f'datasets/{params["project_name"]}/{SET_NAME}/{SET_NAME}'
    MAX_IMAGES = 10000
    coco_gt = COCO(VAL_GT)
    image_ids = coco_gt.getImgIds()[:MAX_IMAGES]

    if override_prev_results or not os.path.exists(
            f'{SET_NAME}_bbox_results.json'):
        model = EfficientDetBackbone(compound_coef=compound_coef,
                                     num_classes=len(obj_list),
                                     ratios=eval(params['anchors_ratios']),
                                     scales=eval(params['anchors_scales']))
        model.load_state_dict(torch.load(weights_path))
        model.requires_grad_(False)
        model.eval()

        if use_cuda:
            model.cuda(gpu)

            if use_float16:
                model.half()

        evaluate_coco(VAL_IMGS, SET_NAME, image_ids, coco_gt, model)

    # _eval(coco_gt, image_ids, f'{SET_NAME}_bbox_results.json')
Exemplo n.º 28
0
class Model():
    def __init__(self, compound_coef=0, force_input_size=512, threshold=0.2, iou_threshold=0.2):
        self.compound_coef = compound_coef
        self.force_input_size = force_input_size  # set None to use default size

        self.threshold = threshold
        self.iou_threshold = iou_threshold

        self.use_cuda = True
        self.use_float16 = False
        cudnn.fastest = True
        cudnn.benchmark = True

        self.obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
                         'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
                         'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie',
                         'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
                         'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
                         'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
                         'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv',
                         'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
                         'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
                         'toothbrush']

        # tf bilinear interpolation is different from any other's, just make do
        self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.input_size = self.input_sizes[self.compound_coef] if self.force_input_size is None else self.force_input_size

        self.model = EfficientDetBackbone(
            compound_coef=self.compound_coef, num_classes=len(self.obj_list))
        self.model.load_state_dict(torch.load(
            f'weights/efficientdet-d{self.compound_coef}.pth'))
        self.model.requires_grad_(False)
        self.model.eval()

        if self.use_cuda:
            self.model = self.model.cuda()
        if self.use_float16:
            self.model = self.model.half()


    def predict(self, raw_img):
        self.ori_imgs, self.framed_imgs, self.framed_metas = preprocess_raw(raw_img, max_size=self.input_size)
        if self.use_cuda:
            x = torch.stack([torch.from_numpy(fi).cuda() for fi in self.framed_imgs], 0)
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in self.framed_imgs], 0)
        x = x.to(torch.float32 if not self.use_float16 else torch.float16).permute(0, 3, 1, 2)

        with torch.no_grad():
            self.features, self.regression, self.classification, self.anchors = self.model(x)

            self.regressBoxes = BBoxTransform()
            self.clipBoxes = ClipBoxes()

            out = postprocess(x,
                            self.anchors, self.regression, self.classification,
                            self.regressBoxes, self.clipBoxes,
                            self.threshold, self.iou_threshold)
            pred = invert_affine(self.framed_metas, out)
            return pred



    def label_img(self, preds, imgs):
        for i in range(len(imgs)):
            if len(preds[i]['rois']) == 0:
                continue

            for j in range(len(preds[i]['rois'])):
                (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int)
                cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2)
                obj = self.obj_list[preds[i]['class_ids'][j]]
                score = float(preds[i]['scores'][j])

                cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score),
                            (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                            (255, 255, 0), 1)
        
        return imgs
    

    def run(self, raw_img):
        pred_label = self.predict(raw_img)
        pred_img = self.label_img(pred_label, self.ori_imgs)
        return pred_img[0]
Exemplo n.º 29
0
class ObjectDetectionService():
    def __init__(self, model_name, model_path):
        # effdet
        self.model_name = model_name
        self.model_path = model_path
        self.input_image_key = 'images'
        self.anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]
        self.anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]
        self.compound_coef = 0
        self.threshold = 0.5
        self.iou_threshold = 0.5
        self.obj_list = [
            '一次性快餐盒', '书籍纸张', '充电宝', '剩饭剩菜', '包', '垃圾桶', '塑料器皿', '塑料玩具',
            '塑料衣架', '大骨头', '干电池', '快递纸袋', '插头电线', '旧衣服', '易拉罐', '枕头', '果皮果肉',
            '毛绒玩具', '污损塑料', '污损用纸', '洗护用品', '烟蒂', '牙签', '玻璃器皿', '砧板', '筷子',
            '纸盒纸箱', '花盆', '茶叶渣', '菜帮菜叶', '蛋壳', '调料瓶', '软膏', '过期药物', '酒瓶',
            '金属厨具', '金属器皿', '金属食品罐', '锅', '陶瓷器皿', '鞋', '食用油桶', '饮料瓶', '鱼骨'
        ]
        self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
        self.input_size = self.input_sizes[self.compound_coef]

        self.model = EfficientDetBackbone(compound_coef=self.compound_coef,
                                          num_classes=len(self.obj_list),
                                          ratios=self.anchor_ratios,
                                          scales=self.anchor_scales)

        self.model.load_state_dict(torch.load(self.model_path), strict=False)
        self.model.requires_grad_(False)
        self.model.eval()

    def _preprocess(self, data):
        preprocessed_data = {}
        for k, v in data.items():
            for file_name, file_content in v.items():
                ori_imgs, framed_imgs, framed_metas = preprocess(
                    file_content, max_size=self.input_size)
                preprocessed_data[k] = [framed_imgs, framed_metas]
        return preprocessed_data

    def _inference(self, data):
        """
        model inference function
        Here are a inference example of resnet, if you use another model, please modify this function
        """
        framed_imgs, framed_metas = data[self.input_image_key]
        if torch.cuda.is_available():
            x = torch.stack(
                [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0)
            self.model = self.model.cuda()
        else:
            x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0)

        x = x.to(torch.float32).permute(0, 3, 1, 2)

        #if use_float16:
        #    model = model.half()

        with torch.no_grad():
            features, regression, classification, anchors = self.model(x)

            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()

            out = postprocess(x, anchors, regression, classification,
                              regressBoxes, clipBoxes, self.threshold,
                              self.iou_threshold)

        out = invert_affine(framed_metas, out)
        result = OrderedDict()
        result['detection_classes'] = []
        result['detection_scores'] = []
        result['detection_boxes'] = []

        for i in range(len(out)):
            if len(out[i]['rois']) == 0:
                continue
            for j in range(len(out[i]['rois'])):
                x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int)
                result['detection_boxes'].append([x1, y1, x2, y2])
                obj = self.obj_list[out[i]['class_ids'][j]]
                result['detection_classes'].append(obj)
                score = float(out[i]['scores'][j])
                result['detection_scores'].append(score)

        return result

    def _postprocess(self, data):
        return data

    def inference(self, data):
        '''
        Wrapper function to run preprocess, inference and postprocess functions.

        Parameters
        ----------
        data : map of object
            Raw input from request.

        Returns
        -------
        list of outputs to be sent back to client.
            data to be sent back
        '''
        pre_start_time = time.time()
        data = self._preprocess(data)
        infer_start_time = time.time()
        # Update preprocess latency metric
        pre_time_in_ms = (infer_start_time - pre_start_time) * 1000
        print('preprocess time: ' + str(pre_time_in_ms) + 'ms')

        data = self._inference(data)
        infer_end_time = time.time()
        infer_in_ms = (infer_end_time - infer_start_time) * 1000

        print('infer time: ' + str(infer_in_ms) + 'ms')
        data = self._postprocess(data)

        # Update inference latency metric
        post_time_in_ms = (time.time() - infer_end_time) * 1000
        print('postprocess time: ' + str(post_time_in_ms) + 'ms')

        print('latency: ' +
              str(pre_time_in_ms + infer_in_ms + post_time_in_ms) + 'ms')
        data['latency_time'] = str(
            round(pre_time_in_ms + infer_in_ms + post_time_in_ms, 1)) + ' ms'
        return data

    '''
def train(opt):
    params = Params(f'projects/{opt.project}.yml')

    # Neptune staff
    all_params = opt.__dict__
    all_params.update(params.params)

    data_path = os.path.join(opt.data_path, params.project_name)

    tags = [
        'EfficientDet', f'D{opt.compound_coef}', f'bs{opt.batch_size}',
        opt.optim
    ]
    if opt.head_only:
        tags.append('head_only')

    if len(params.obj_list) == 1:
        tags.append('one_class')

    if opt.no_aug:
        tags.append('no_aug')

    neptune.create_experiment(name='EfficientDet',
                              tags=tags,
                              params=all_params,
                              upload_source_files=['train.py', 'coco_eval.py'])
    log_data_version(data_path)

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    opt.saved_path = os.path.join(opt.saved_path, params.project_name)
    opt.log_path = os.path.join(opt.log_path, params.project_name,
                                'tensorboard/')
    os.makedirs(opt.log_path, exist_ok=True)
    os.makedirs(opt.saved_path, exist_ok=True)

    training_params = {
        'batch_size': opt.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    val_params = {
        'batch_size': opt.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': opt.num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
    if opt.no_aug:
        transform_list = [
            Normalizer(mean=params.mean, std=params.std),
            Resizer(input_sizes[opt.compound_coef])
        ]
    else:
        transform_list = [
            Normalizer(mean=params.mean, std=params.std),
            Augmenter(),
            Resizer(input_sizes[opt.compound_coef])
        ]

    training_set = CocoDataset(root_dir=os.path.join(opt.data_path,
                                                     params.project_name),
                               set=params.train_set,
                               transform=transforms.Compose(transform_list))
    training_generator = DataLoader(training_set, **training_params)

    val_set = CocoDataset(root_dir=os.path.join(opt.data_path,
                                                params.project_name),
                          set=params.val_set,
                          transform=transforms.Compose([
                              Normalizer(mean=params.mean, std=params.std),
                              Resizer(input_sizes[opt.compound_coef])
                          ]))
    val_generator = DataLoader(val_set, **val_params)

    model = EfficientDetBackbone(num_classes=len(params.obj_list),
                                 compound_coef=opt.compound_coef,
                                 ratios=eval(params.anchors_ratios),
                                 scales=eval(params.anchors_scales))

    # load last weights
    if opt.load_weights is not None:
        if opt.load_weights.endswith('.pth'):
            weights_path = opt.load_weights
        else:
            weights_path = get_last_weights(opt.saved_path)
        try:
            last_step = int(
                os.path.basename(weights_path).split('_')[-1].split('.')[0])
        except:
            last_step = 0

        try:
            ret = model.load_state_dict(torch.load(weights_path), strict=False)
        except RuntimeError as e:
            print(f'[Warning] Ignoring {e}')
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print(
            f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}'
        )
    else:
        last_step = 0
        print('[Info] initializing weights...')
        init_weights(model)

    # freeze backbone if train head_only
    if opt.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet', 'BiFPN']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(
        opt.log_path +
        f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/')

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=opt.debug)

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = CustomDataParallel(model, params.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if opt.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    opt.lr,
                                    momentum=opt.momentum,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    epoch = 0
    best_loss = 1e5
    best_epoch = 0
    best_step = 0
    best_checkpoint = None
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(opt.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            epoch_cls_loss = []
            epoch_reg_loss = []

            if epoch % opt.val_interval == 0:
                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if params.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss = model(imgs,
                                                   annot,
                                                   obj_list=params.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                    .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression Loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication Loss', {'val': cls_loss},
                                   step)

                neptune.log_metric('Val Loss', step, loss)
                neptune.log_metric('Val Regression Loss', step, reg_loss)
                neptune.log_metric('Val Classification Loss', step, cls_loss)

                with torch.no_grad():
                    stats = evaluate(model.model,
                                     params.params,
                                     threshold=opt.val_threshold,
                                     step=step)

                neptune.log_metric('AP at IoU=.50:.05:.95', step, stats[0])
                neptune.log_metric('AP at IoU=.50', step, stats[1])
                neptune.log_metric('AP at IoU=.75', step, stats[2])
                neptune.log_metric('AR given 1 detection per image', step,
                                   stats[6])
                neptune.log_metric('AR given 10 detection per image', step,
                                   stats[7])
                neptune.log_metric('AR given 100 detection per image', step,
                                   stats[8])

                if loss + opt.es_min_delta < best_loss:
                    best_loss = loss
                    best_epoch = epoch
                    best_step = step
                    checkpoint_name = f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                    checkpoint_path = save_checkpoint(model, opt.saved_path,
                                                      checkpoint_name)
                    best_checkpoint = checkpoint_path

                model.train()

            progress_bar = tqdm(training_generator)
            for iter, data in enumerate(progress_bar):
                if iter < step - last_epoch * num_iter_per_epoch:
                    progress_bar.update()
                    continue
                try:
                    imgs = data['img']
                    annot = data['annot']

                    if params.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    cls_loss, reg_loss = model(imgs,
                                               annot,
                                               obj_list=params.obj_list,
                                               step=step)
                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    epoch_loss.append(float(loss))
                    epoch_cls_loss.append(float(cls_loss))
                    epoch_reg_loss.append(float(reg_loss))

                    progress_bar.set_description(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                        .format(step, epoch, opt.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)

                    neptune.log_metric('Train Loss', step, loss)
                    neptune.log_metric('Train Regression Loss', step, reg_loss)
                    neptune.log_metric('Train Classification Loss', step,
                                       cls_loss)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)
                    neptune.log_metric('Learning Rate', step, current_lr)

                    step += 1

                    if step % opt.save_interval == 0 and step > 0:
                        save_checkpoint(
                            model, opt.saved_path,
                            f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth'
                        )
                        print('checkpoint...')

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue

            scheduler.step(np.mean(epoch_loss))
            neptune.log_metric('Epoch Loss', step, np.mean(epoch_loss))
            neptune.log_metric('Epoch Classification Loss', step,
                               np.mean(epoch_cls_loss))
            neptune.log_metric('Epoch Regression Loss', step,
                               np.mean(epoch_reg_loss))

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    '[Info] Stop training at epoch {}. The lowest loss achieved is {}'
                    .format(epoch, best_loss))
                break

    except KeyboardInterrupt:
        save_checkpoint(
            model, opt.saved_path,
            f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth')
        send_best_checkpoint(best_checkpoint, best_step)
        writer.close()
    writer.close()
    send_best_checkpoint(best_checkpoint, best_step)
    neptune.stop()