示例#1
0
def eval_model(weight_path="weights/gfocalV2_resnet50_best_map.pth",
               device="cuda:0"):
    from pycocotools.coco import COCO
    device = torch.device(device)
    with open("config/gfocal.yaml", 'r') as rf:
        cfg = yaml.safe_load(rf)
    net = GFocal(**{
        **cfg['model'], 'pretrained': False,
        "nms_iou_thresh": 0.6
    })
    net.load_state_dict(torch.load(weight_path, map_location="cpu")['ema'])
    net.to(device)
    net.eval().half()
    data_cfg = cfg['data']
    basic_transform = RandScaleToMax(max_threshes=[data_cfg['max_thresh']])
    coco = COCO(data_cfg['val_annotation_path'])
    coco_predict_list = list()
    time_logger = AverageLogger()
    pbar = tqdm(coco.imgs.keys())
    for img_id in pbar:
        file_name = coco.imgs[img_id]['file_name']
        img_path = os.path.join(data_cfg['val_img_root'], file_name)
        img = cv.imread(img_path)
        h, w, _ = img.shape
        img, ratio, (left, top) = basic_transform.make_border(
            img, max_thresh=data_cfg['max_thresh'], border_val=(103, 116, 123))
        img_inp = (img[:, :, ::-1] / 255.0 -
                   np.array(rgb_mean)) / np.array(rgb_std)
        img_inp = torch.from_numpy(img_inp).unsqueeze(0).permute(
            0, 3, 1, 2).contiguous().float().to(device).half()
        tic = time.time()
        predict = net(img_inp)["predicts"][0]
        duration = time.time() - tic
        time_logger.update(duration)
        pbar.set_description("fps:{:4.2f}".format(1 / time_logger.avg()))
        if predict is None:
            continue
        predict[:, [0, 2]] = ((predict[:, [0, 2]] - left) / ratio).clamp(min=0,
                                                                         max=w)
        predict[:, [1, 3]] = ((predict[:, [1, 3]] - top) / ratio).clamp(min=0,
                                                                        max=h)
        box = predict.cpu().numpy()
        coco_box = box[:, :4]
        coco_box[:, 2:] = coco_box[:, 2:] - coco_box[:, :2]
        for p, b in zip(box.tolist(), coco_box.tolist()):
            coco_predict_list.append({
                'image_id': img_id,
                'category_id': coco_ids[int(p[5])],
                'bbox': [round(x, 3) for x in b],
                'score': round(p[4], 5)
            })
    with open("predicts.json", 'w') as file:
        json.dump(coco_predict_list, file)
    coco_eavl(anno_path=data_cfg['val_annotation_path'],
              pred_path="predicts.json")
示例#2
0
def eval_model(weight_path="weights/faster_rcnn_resnet50_last.pth",
               device="cuda:4"):
    from pycocotools.coco import COCO
    device = torch.device(device)
    with open("config/faster.yaml", 'r') as rf:
        cfg = yaml.safe_load(rf)
    net = FasterRCNN(**{**cfg['model'], 'pretrained': False})
    net.load_state_dict(torch.load(weight_path, map_location="cpu")['ema'])
    net.to(device)
    net.eval()
    data_cfg = cfg['data']
    basic_transform = RandScaleMinMax(min_threshes=[640],
                                      max_thresh=data_cfg['max_thresh'])
    coco = COCO(data_cfg['val_annotation_path'])
    coco_predict_list = list()
    time_logger = AverageLogger()
    pbar = tqdm(coco.imgs.keys())
    for img_id in pbar:
        file_name = coco.imgs[img_id]['file_name']
        img_path = os.path.join(data_cfg['val_img_root'], file_name)
        img = cv.imread(img_path)
        h, w, _ = img.shape
        img, ratio = basic_transform.scale_img(img, min_thresh=640)
        h_, w_ = img.shape[:2]
        padding_size = max(h_, w_)
        img_inp = np.ones((padding_size, padding_size, 3)) * np.array(
            (103, 116, 123))
        img_inp[:h_, :w_, :] = img
        img_inp = (img_inp[:, :, ::-1] / 255.0 -
                   np.array(rgb_mean)) / np.array(rgb_std)
        img_inp = torch.from_numpy(img_inp).unsqueeze(0).permute(
            0, 3, 1, 2).contiguous().float().to(device)
        tic = time.time()
        predict = net(img_inp, valid_size=[(padding_size, padding_size)])[0]
        duration = time.time() - tic
        time_logger.update(duration)
        pbar.set_description("fps:{:4.2f}".format(1 / time_logger.avg()))
        if predict is None:
            continue
        predict[:, [0, 2]] = (predict[:, [0, 2]] / ratio).clamp(min=0, max=w)
        predict[:, [1, 3]] = (predict[:, [1, 3]] / ratio).clamp(min=0, max=h)
        box = predict.cpu().numpy()
        coco_box = box[:, :4]
        coco_box[:, 2:] = coco_box[:, 2:] - coco_box[:, :2]
        for p, b in zip(box.tolist(), coco_box.tolist()):
            coco_predict_list.append({
                'image_id': img_id,
                'category_id': coco_ids[int(p[5])],
                'bbox': [round(x, 3) for x in b],
                'score': round(p[4], 5)
            })
    with open("predicts.json", 'w') as file:
        json.dump(coco_predict_list, file)
    coco_eavl(anno_path=data_cfg['val_annotation_path'],
              pred_path="predicts.json")
示例#3
0
 def __init__(self, cfg_path):
     with open(cfg_path, 'r') as rf:
         self.cfg = yaml.safe_load(rf)
     self.data_cfg = self.cfg['data']
     self.model_cfg = self.cfg['model']
     self.optim_cfg = self.cfg['optim']
     self.val_cfg = self.cfg['val']
     print(self.data_cfg)
     print(self.model_cfg)
     print(self.optim_cfg)
     print(self.val_cfg)
     os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
     self.gpu_num = len(str(self.cfg['gpus']).split(","))
     dist.init_process_group(backend='nccl')
     self.tdata = CustomerDataSets(json_path=self.data_cfg['train_json_path'],
                                   debug=self.data_cfg['debug'],
                                   augment=True,
                                   )
     self.tloader = DataLoader(dataset=self.tdata,
                               batch_size=self.data_cfg['batch_size'],
                               num_workers=self.data_cfg['num_workers'],
                               collate_fn=self.tdata.collate_fn,
                               sampler=DistributedSampler(dataset=self.tdata, shuffle=True))
     self.vdata = CustomerDataSets(json_path=self.data_cfg['val_json_path'],
                                   debug=self.data_cfg['debug'],
                                   augment=False,
                                   )
     self.vloader = DataLoader(dataset=self.vdata,
                               batch_size=self.data_cfg['batch_size'],
                               num_workers=self.data_cfg['num_workers'],
                               collate_fn=self.vdata.collate_fn,
                               sampler=DistributedSampler(dataset=self.vdata, shuffle=False))
     print("train_data: ", len(self.tdata), " | ",
           "val_data: ", len(self.vdata))
     print("train_iter: ", len(self.tloader), " | ",
           "val_iter: ", len(self.vloader))
     if self.cfg['model_name'] == "v4":
         net = YOLOv4
     elif self.cfg['model_name'] == "v5":
         net = YOLOv5
     else:
         raise NotImplementedError("{:s} not supported yet".format(self.cfg['model_name']))
     model = net(num_cls=self.model_cfg['num_cls'],
                 anchors=self.model_cfg['anchors'],
                 strides=self.model_cfg['strides'],
                 scale_name=self.model_cfg['scale_name'],
                 )
     self.best_map = 0.
     optimizer = split_optimizer(model, self.optim_cfg)
     local_rank = dist.get_rank()
     self.local_rank = local_rank
     self.device = torch.device("cuda", local_rank)
     model.to(self.device)
     self.scaler = amp.GradScaler(enabled=True)
     if self.optim_cfg['sync_bn']:
         model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
     self.model = nn.parallel.distributed.DistributedDataParallel(model,
                                                                  device_ids=[local_rank],
                                                                  output_device=local_rank)
     self.optimizer = optimizer
     self.ema = ModelEMA(self.model)
     self.lr_adjuster = IterWarmUpCosineDecayMultiStepLRAdjust(init_lr=self.optim_cfg['lr'],
                                                               warm_up_epoch=self.optim_cfg['warm_up_epoch'],
                                                               iter_per_epoch=len(self.tloader),
                                                               epochs=self.optim_cfg['epochs'],
                                                               alpha=self.optim_cfg['alpha'],
                                                               gamma=self.optim_cfg['gamma'],
                                                               bias_idx=2,
                                                               milestones=self.optim_cfg['milestones']
                                                               )
     self.obj_logger = AverageLogger()
     self.iou_logger = AverageLogger()
     self.loss_logger = AverageLogger()
     self.map_logger = AverageLogger()
示例#4
0
class DDPMixSolver(object):
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        self.gpu_num = len(str(self.cfg['gpus']).split(","))
        dist.init_process_group(backend='nccl')
        self.tdata = CustomerDataSets(json_path=self.data_cfg['train_json_path'],
                                      debug=self.data_cfg['debug'],
                                      augment=True,
                                      )
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(dataset=self.tdata, shuffle=True))
        self.vdata = CustomerDataSets(json_path=self.data_cfg['val_json_path'],
                                      debug=self.data_cfg['debug'],
                                      augment=False,
                                      )
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ",
              "val_data: ", len(self.vdata))
        print("train_iter: ", len(self.tloader), " | ",
              "val_iter: ", len(self.vloader))
        if self.cfg['model_name'] == "v4":
            net = YOLOv4
        elif self.cfg['model_name'] == "v5":
            net = YOLOv5
        else:
            raise NotImplementedError("{:s} not supported yet".format(self.cfg['model_name']))
        model = net(num_cls=self.model_cfg['num_cls'],
                    anchors=self.model_cfg['anchors'],
                    strides=self.model_cfg['strides'],
                    scale_name=self.model_cfg['scale_name'],
                    )
        self.best_map = 0.
        optimizer = split_optimizer(model, self.optim_cfg)
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        self.scaler = amp.GradScaler(enabled=True)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(model,
                                                                     device_ids=[local_rank],
                                                                     output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)
        self.lr_adjuster = IterWarmUpCosineDecayMultiStepLRAdjust(init_lr=self.optim_cfg['lr'],
                                                                  warm_up_epoch=self.optim_cfg['warm_up_epoch'],
                                                                  iter_per_epoch=len(self.tloader),
                                                                  epochs=self.optim_cfg['epochs'],
                                                                  alpha=self.optim_cfg['alpha'],
                                                                  gamma=self.optim_cfg['gamma'],
                                                                  bias_idx=2,
                                                                  milestones=self.optim_cfg['milestones']
                                                                  )
        self.obj_logger = AverageLogger()
        self.iou_logger = AverageLogger()
        self.loss_logger = AverageLogger()
        self.map_logger = AverageLogger()

    def train(self, epoch):
        self.obj_logger.reset()
        self.iou_logger.reset()
        self.loss_logger.reset()
        self.model.train()
        if self.local_rank == 0:
            pbar = tqdm(self.tloader)
        else:
            pbar = self.tloader
        for i, (img_tensor, targets_tensor) in enumerate(pbar):
            with torch.no_grad():
                if len(self.data_cfg['multi_scale']) > 2:
                    target_size = np.random.choice(self.data_cfg['multi_scale'])
                    img_tensor = interpolate(img_tensor, mode='bilinear', size=target_size, align_corners=False)
                _, _, h, w = img_tensor.shape
                img_tensor = img_tensor.to(self.device)
                targets_tensor = targets_tensor.to(self.device)
            self.optimizer.zero_grad()
            with amp.autocast(enabled=True):
                ret = self.model(img_tensor, targets_tensor)
                obj_loss = ret['obj_loss']
                iou_loss = ret['iou_loss']
                loss = obj_loss + iou_loss
            self.scaler.scale(loss).backward()
            self.lr_adjuster(self.optimizer, i, epoch)
            ulr = self.optimizer.param_groups[0]['lr']
            dlr = self.optimizer.param_groups[2]['lr']
            self.scaler.step(self.optimizer)
            self.scaler.update()
            self.ema.update(self.model)
            self.obj_logger.update(obj_loss.item())
            self.iou_logger.update(iou_loss.item())
            self.loss_logger.update(loss.item())
            if self.local_rank == 0:
                pbar.set_description(
                    "epoch:{:2d}|size:{:3d}|loss:{:6.4f}|obj_loss:{:6.4f}|iou_loss:{:6.4f}|ulr:{:8.6f},dlr:{:8.6f}".format(
                        epoch + 1,
                        h,
                        self.loss_logger.avg(),
                        obj_loss.item(),
                        iou_loss.item(),
                        ulr,
                        dlr
                    ))
        self.ema.update_attr(self.model)
        print(
            "epoch:{:3d}|local:{:3d}|loss:{:6.4f}||obj_loss:{:6.4f}|iou_loss:{:6.4f}".format(epoch + 1,
                                                                                             self.local_rank,
                                                                                             self.loss_logger.avg(),
                                                                                             self.obj_logger.avg(),
                                                                                             self.iou_logger.avg(),
                                                                                             )
        )

    @torch.no_grad()
    def val(self, epoch):
        self.model.eval()
        self.ema.ema.eval()
        predict_list = list()
        target_list = list()
        if self.local_rank == 0:
            pbar = tqdm(self.vloader)
        else:
            pbar = self.vloader
        for img_tensor, targets_tensor in pbar:
            _, _, h, w = img_tensor.shape
            targets_tensor[:, 1:] = targets_tensor[:, 1:] * torch.tensor(data=[w, h, w, h])
            targets_tensor[:, [1, 2]] = targets_tensor[:, [1, 2]] - targets_tensor[:, [3, 4]] * 0.5
            targets_tensor[:, [3, 4]] = targets_tensor[:, [1, 2]] + targets_tensor[:, [3, 4]]
            img_tensor = img_tensor.to(self.device)
            targets_tensor = targets_tensor.to(self.device)
            predicts = self.ema.ema(img_tensor)['predicts']
            for i, pred in enumerate(predicts):
                if pred is not None:
                    pred = torch.cat([pred, torch.zeros_like(pred[..., [0]])], dim=-1)
                predict_list.append(pred)
                targets_sample = targets_tensor[targets_tensor[:, 0] == i][:, 1:]
                targets_sample = torch.cat([torch.zeros_like(targets_sample[..., [0]]), targets_sample], dim=-1)
                target_list.append(targets_sample)
        mp, mr, map50, map = coco_map(predict_list, target_list)
        mp = reduce_sum(torch.tensor(mp, device=self.device)).item() / self.gpu_num
        mr = reduce_sum(torch.tensor(mr, device=self.device)).item() / self.gpu_num
        map50 = reduce_sum(torch.tensor(map50, device=self.device)).item() / self.gpu_num
        map = reduce_sum(torch.tensor(map, device=self.device)).item() / self.gpu_num
        if self.local_rank == 0:
            print("epoch: {:2d}|gpu_num:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}"
                  .format(epoch + 1,
                          self.gpu_num,
                          mp * 100,
                          mr * 100,
                          map50 * 100,
                          map * 100))
        last_weight_path = os.path.join(self.val_cfg['weight_path'],
                                        "{:s}_{:s}_last.pth"
                                        .format(self.cfg['model_name'], self.model_cfg['scale_name']))
        best_map_weight_path = os.path.join(self.val_cfg['weight_path'],
                                            "{:s}_{:s}_best_map.pth"
                                            .format(self.cfg['model_name'], self.model_cfg['scale_name']))
        ema_static = self.ema.ema.state_dict()
        cpkt = {
            "ema": ema_static,
            "map": map * 100,
            "epoch": epoch,
        }
        if self.local_rank != 0:
            return
        torch.save(cpkt, last_weight_path)
        if map > self.best_map:
            torch.save(cpkt, best_map_weight_path)
            self.best_map = map

    def run(self):
        for epoch in range(self.optim_cfg['epochs']):
            self.train(epoch)
            if (epoch + 1) % self.val_cfg['interval'] == 0:
                self.val(epoch)
        dist.destroy_process_group()
        torch.cuda.empty_cache()
示例#5
0
def visualize_model(weight_path="weights/solov2_resnet50_last.pth", device="cuda:0"):
    from pycocotools.coco import COCO
    device = torch.device(device)
    with open("config/solov2.yaml", 'r') as rf:
        cfg = yaml.safe_load(rf)
    #     "box_score_thresh": 0.8
    net = SOLOv2(**{**cfg['model'], 'pretrained': False, })
    net.load_state_dict(torch.load(weight_path, map_location="cpu")['ema'])
    net.to(device)
    net.eval()
    data_cfg = cfg['data']
    basic_transform = RandScaleMinMax(min_threshes=[640], max_thresh=data_cfg['max_thresh'])
    coco = COCO(data_cfg['val_annotation_path'])
    coco_predict_list = list()
    time_logger = AverageLogger()
    pbar = tqdm(coco.imgs.keys())
    i = 0
    for img_id in pbar:
        file_name = coco.imgs[img_id]['file_name']
        img_path = os.path.join(data_cfg['val_img_root'], file_name)
        img = cv.imread(img_path)
        # ori_img = img.copy()
        h, w, _ = img.shape
        img, ratio = basic_transform.scale_img(img,
                                               min_thresh=640)
        h_, w_ = img.shape[:2]
        padding_size = make_divisible(max(h_, w_), 64)
        img_inp = np.ones((padding_size, padding_size, 3)) * np.array((103, 116, 123))
        img_inp[:h_, :w_, :] = img
        img_inp = (img_inp[:, :, ::-1] / 255.0 - np.array(rgb_mean)) / np.array(rgb_std)
        img_inp = torch.from_numpy(img_inp).unsqueeze(0).permute(0, 3, 1, 2).contiguous().float().to(device)
        tic = time.time()
        predict = net(img_inp, valid_size=[(w_, h_)])['predicts'][0]
        duration = time.time() - tic
        time_logger.update(duration)
        pbar.set_description("fps:{:4.2f}".format(1 / time_logger.avg()))
        box, mask = predict
        if len(box) == 0:
            continue
        box = box.cpu().numpy()
        mask = (mask.cpu().numpy()).astype(np.uint8)
        if len(mask.shape) == 2:
            mask = mask[None, ...]
        mask = mask.transpose(1, 2, 0)
        mask = cv.resize(mask, dsize=(w, h))
        if len(mask.shape) == 2:
            mask = mask[..., None]
        mask = mask.transpose(2, 0, 1)
        # mask = mask.cpu().numpy()
        for p, m in zip(box.tolist(), mask):
            coco_predict_list.append({'image_id': img_id,
                                      'category_id': coco_ids[int(p[5])],
                                      # 'bbox': [round(x, 3) for x in b],
                                      'score': round(p[4], 5),
                                      'segmentation': maskUtils.encode(np.asfortranarray(m))})
        # box_seg_info = BoxSegInfo(img=ori_img, shape=(w, h), boxes=box[:, :4], labels=box[:, -1], mask=mask)
        # ret_img = box_seg_info.draw_mask(colors, coco_names, boxes=False)
        # import uuid
        # file_name = str(uuid.uuid4()).replace("-", "")
        # cv.imwrite("{:s}.jpg".format(file_name), ret_img)
        # i += 1
        # if i == 20:
        #     break
    coco_eavl(anno_path=data_cfg['val_annotation_path'], pred_path=coco_predict_list, type="segm")
示例#6
0
 def __init__(self, cfg_path):
     with open(cfg_path, 'r') as rf:
         self.cfg = yaml.safe_load(rf)
     self.data_cfg = self.cfg['data']
     self.model_cfg = self.cfg['model']
     self.optim_cfg = self.cfg['optim']
     self.val_cfg = self.cfg['val']
     print(self.data_cfg)
     print(self.model_cfg)
     print(self.optim_cfg)
     print(self.val_cfg)
     os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
     self.gpu_num = len(self.cfg['gpus'].split(','))
     dist.init_process_group(backend='nccl')
     self.tdata = COCODataSets(
         img_root=self.data_cfg['train_img_root'],
         annotation_path=self.data_cfg['train_annotation_path'],
         max_thresh=self.data_cfg['max_thresh'],
         debug=self.data_cfg['debug'],
         use_crowd=self.data_cfg['use_crowd'],
         augments=True,
         remove_blank=self.data_cfg['remove_blank'])
     self.tloader = DataLoader(dataset=self.tdata,
                               batch_size=self.data_cfg['batch_size'],
                               num_workers=self.data_cfg['num_workers'],
                               collate_fn=self.tdata.collect_fn,
                               sampler=DistributedSampler(
                                   dataset=self.tdata, shuffle=True))
     self.vdata = COCODataSets(
         img_root=self.data_cfg['val_img_root'],
         annotation_path=self.data_cfg['val_annotation_path'],
         max_thresh=self.data_cfg['max_thresh'],
         debug=self.data_cfg['debug'],
         use_crowd=self.data_cfg['use_crowd'],
         augments=False,
         remove_blank=False)
     self.vloader = DataLoader(dataset=self.vdata,
                               batch_size=self.data_cfg['batch_size'],
                               num_workers=self.data_cfg['num_workers'],
                               collate_fn=self.vdata.collect_fn,
                               sampler=DistributedSampler(
                                   dataset=self.vdata, shuffle=False))
     print("train_data: ", len(self.tdata), " | ", "val_data: ",
           len(self.vdata), " | ", "empty_data: ",
           self.tdata.empty_images_len)
     print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
           len(self.vloader))
     model = SparseRCNN(**self.model_cfg)
     self.best_map = 0.
     optimizer = split_optimizer_v2(model, self.optim_cfg)
     local_rank = dist.get_rank()
     self.local_rank = local_rank
     self.device = torch.device("cuda", local_rank)
     model.to(self.device)
     if self.optim_cfg['sync_bn']:
         model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
     self.model = nn.parallel.distributed.DistributedDataParallel(
         model, device_ids=[local_rank], output_device=local_rank)
     self.scaler = amp.GradScaler(
         enabled=True) if self.optim_cfg['amp'] else None
     self.optimizer = optimizer
     self.ema = ModelEMA(self.model)
     self.lr_adjuster = IterWarmUpMultiStepDecay(
         init_lr=self.optim_cfg['lr'],
         milestones=self.optim_cfg['milestones'],
         warm_up_iter=self.optim_cfg['warm_up_iter'],
         iter_per_epoch=len(self.tloader),
         epochs=self.optim_cfg['epochs'],
         alpha=self.optim_cfg['alpha'],
         warm_up_factor=self.optim_cfg['warm_up_factor'])
     self.cls_loss_logger = AverageLogger()
     self.l1_loss_logger = AverageLogger()
     self.iou_loss_logger = AverageLogger()
     self.match_num_logger = AverageLogger()
     self.loss_logger = AverageLogger()
示例#7
0
class DDPMixSolver(object):
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        self.gpu_num = len(self.cfg['gpus'].split(','))
        dist.init_process_group(backend='nccl')
        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            max_thresh=self.data_cfg['max_thresh'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collect_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            max_thresh=self.data_cfg['max_thresh'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collect_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))
        model = SparseRCNN(**self.model_cfg)
        self.best_map = 0.
        optimizer = split_optimizer_v2(model, self.optim_cfg)
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.scaler = amp.GradScaler(
            enabled=True) if self.optim_cfg['amp'] else None
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)
        self.lr_adjuster = IterWarmUpMultiStepDecay(
            init_lr=self.optim_cfg['lr'],
            milestones=self.optim_cfg['milestones'],
            warm_up_iter=self.optim_cfg['warm_up_iter'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            alpha=self.optim_cfg['alpha'],
            warm_up_factor=self.optim_cfg['warm_up_factor'])
        self.cls_loss_logger = AverageLogger()
        self.l1_loss_logger = AverageLogger()
        self.iou_loss_logger = AverageLogger()
        self.match_num_logger = AverageLogger()
        self.loss_logger = AverageLogger()
        # if self.local_rank == 0:
        #     print(self.model)

    def train(self, epoch):
        self.loss_logger.reset()
        self.cls_loss_logger.reset()
        self.l1_loss_logger.reset()
        self.iou_loss_logger.reset()
        self.match_num_logger.reset()
        self.model.train()
        if self.local_rank == 0:
            pbar = tqdm(self.tloader)
        else:
            pbar = self.tloader
        for i, (img_tensor, targets_tensor, batch_len) in enumerate(pbar):
            _, _, h, w = img_tensor.shape
            with torch.no_grad():
                img_tensor = img_tensor.to(self.device)
                targets_tensor = targets_tensor.to(self.device)
            self.optimizer.zero_grad()
            if self.scaler is not None:
                with amp.autocast(enabled=True):
                    out = self.model(img_tensor,
                                     targets={
                                         "target": targets_tensor,
                                         "batch_len": batch_len
                                     })
                    cls_loss = out['cls_loss']
                    l1_loss = out['l1_loss']
                    iou_loss = out['iou_loss']
                    match_num = out['match_num']
                    loss = cls_loss + l1_loss + iou_loss
                    self.scaler.scale(loss).backward()
                    self.lr_adjuster(self.optimizer, i, epoch)
                    self.scaler.step(self.optimizer)
                    self.scaler.update()
            else:
                out = self.model(img_tensor,
                                 targets={
                                     "target": targets_tensor,
                                     "batch_len": batch_len
                                 })
                cls_loss = out['cls_loss']
                l1_loss = out['l1_loss']
                iou_loss = out['iou_loss']
                match_num = out['match_num']
                loss = cls_loss + l1_loss + iou_loss
                loss.backward()
                self.lr_adjuster(self.optimizer, i, epoch)
                self.optimizer.step()
            self.ema.update(self.model)
            lr = self.optimizer.param_groups[0]['lr']
            self.loss_logger.update(loss.item())
            self.iou_loss_logger.update(iou_loss.item())
            self.l1_loss_logger.update(l1_loss.item())
            self.cls_loss_logger.update(cls_loss.item())
            self.match_num_logger.update(match_num)
            str_template = \
                "epoch:{:2d}|match_num:{:0>4d}|size:{:3d}|loss:{:6.4f}|cls:{:6.4f}|l1:{:6.4f}|iou:{:6.4f}|lr:{:8.6f}"
            if self.local_rank == 0:
                pbar.set_description(
                    str_template.format(epoch + 1, int(match_num), h,
                                        self.loss_logger.avg(),
                                        self.cls_loss_logger.avg(),
                                        self.l1_loss_logger.avg(),
                                        self.iou_loss_logger.avg(), lr))
        self.ema.update_attr(self.model)
        loss_avg = reduce_sum(
            torch.tensor(self.loss_logger.avg(),
                         device=self.device)) / self.gpu_num
        iou_loss_avg = reduce_sum(
            torch.tensor(self.iou_loss_logger.avg(),
                         device=self.device)).item() / self.gpu_num
        l1_loss_avg = reduce_sum(
            torch.tensor(self.l1_loss_logger.avg(),
                         device=self.device)).item() / self.gpu_num
        cls_loss_avg = reduce_sum(
            torch.tensor(self.cls_loss_logger.avg(),
                         device=self.device)).item() / self.gpu_num
        match_num_sum = reduce_sum(
            torch.tensor(self.match_num_logger.sum(),
                         device=self.device)).item() / self.gpu_num
        if self.local_rank == 0:
            final_template = "epoch:{:2d}|match_num:{:d}|loss:{:6.4f}|cls:{:6.4f}|l1:{:6.4f}|iou:{:6.4f}"
            print(
                final_template.format(epoch + 1, int(match_num_sum), loss_avg,
                                      cls_loss_avg, l1_loss_avg, iou_loss_avg))

    @torch.no_grad()
    def val(self, epoch):
        predict_list = list()
        target_list = list()
        self.model.eval()
        self.ema.ema.eval()
        if self.local_rank == 0:
            pbar = tqdm(self.vloader)
        else:
            pbar = self.vloader
        for img_tensor, targets_tensor, batch_len in pbar:
            img_tensor = img_tensor.to(self.device)
            targets_tensor = targets_tensor.to(self.device)
            predicts = self.ema.ema(img_tensor)['predicts']
            for pred, target in zip(predicts, targets_tensor.split(batch_len)):
                predict_list.append(pred)
                target_list.append(target)
        mp, mr, map50, mean_ap = coco_map(predict_list, target_list)
        mp = reduce_sum(torch.tensor(mp, device=self.device)) / self.gpu_num
        mr = reduce_sum(torch.tensor(mr, device=self.device)) / self.gpu_num
        map50 = reduce_sum(torch.tensor(map50,
                                        device=self.device)) / self.gpu_num
        mean_ap = reduce_sum(torch.tensor(mean_ap,
                                          device=self.device)) / self.gpu_num

        if self.local_rank == 0:
            print("*" * 20, "eval start", "*" * 20)
            print(
                "epoch: {:2d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}".
                format(epoch + 1, mp * 100, mr * 100, map50 * 100,
                       mean_ap * 100))
            print("*" * 20, "eval end", "*" * 20)
        last_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_{:s}_last.pth".format(self.cfg['model_name'],
                                        self.model_cfg['backbone']))
        best_map_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_{:s}_best_map.pth".format(self.cfg['model_name'],
                                            self.model_cfg['backbone']))
        model_static = self.model.module.state_dict()
        cpkt = {
            "model": model_static,
            "map": mean_ap * 100,
            "epoch": epoch,
            "ema": self.ema.ema.state_dict()
        }
        if self.local_rank != 0:
            return
        torch.save(cpkt, last_weight_path)
        if mean_ap > self.best_map:
            torch.save(cpkt, best_map_weight_path)
            self.best_map = mean_ap

    def run(self):
        for epoch in range(self.optim_cfg['epochs']):
            self.train(epoch)
            if (epoch + 1) % self.val_cfg['interval'] == 0:
                self.val(epoch)