def main():
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    # parse the configuarations
    parser = argparse.ArgumentParser(
        description='Additioal configurations for testing',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '--gpu_ids',
        type=str,
        default='-1',
        help=
        'IDs of GPUs to use (please use `,` to split multiple IDs); -1 means CPU only'
    )
    parser.add_argument('--tt_list',
                        type=str,
                        required=True,
                        help='Path to the list of testing files')
    parser.add_argument('--ckpt_dir',
                        type=str,
                        required=True,
                        help='Name of the directory to write log')
    parser.add_argument('--model_file',
                        type=str,
                        required=True,
                        help='Path to the model file')
    parser.add_argument('--est_path',
                        type=str,
                        default='../data/estimates',
                        help='Path to dump estimates')
    parser.add_argument(
        '--write_ideal',
        default=False,
        action='store_true',
        help=
        'Whether to write ideal signals (the speech signals resynthesized from the ideal training targets; ex. for time-domain enhancement, it is the same as clean speech)'
    )

    args = parser.parse_args()
    logger.info('Arguments in command:\n{}'.format(pprint.pformat(vars(args))))

    model = Model()
    model.test(args)
Exemplo n.º 2
0
def main():

    args = parse_args()

    # build model
    model = Model(radius=0.1,
                  bottleneck=1024,
                  num_pts=2048,
                  num_pts_observed=2048,
                  num_vote_train=64,
                  num_contrib_vote_train=10,
                  num_vote_test=128,
                  is_vote=True,
                  task='completion')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)

    # load model
    model_path = args.checkpoint
    if not os.path.isfile(model_path):
        raise ValueError(
            '{} does not exist. Please provide a valid path for pretrained model!'
            .format(model_path))
    model.load_state_dict(torch.load(model_path))
    print('Load model successfully from: {}'.format(args.checkpoint))

    save_dir = 'demo_results'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    dataset = load_data(args.data_path)

    model.eval()
    with torch.no_grad():
        for data in dataset:
            pos, batch, filename = data
            pos, batch = pos.to(device), batch.to(device)

            pred, _ = model(None, pos, batch)

            pred = pred.cpu().detach().numpy()[0]
            np.save(os.path.join(save_dir, 'pred_{}'.format(filename)), pred)

    print('Done.')
Exemplo n.º 3
0
def test(cfg,
         data,
         weights=None,
         batch_size=16,
         img_size=608,
         iou_thres=0.5,
         conf_thres=0.001,
         nms_thres=0.5,
         save_json=True,
         hyp=None,
         model=None,
         single_cls=False):
    """test the metrics of the trained model

    :param str cfg: model cfg file
    :param str data: data dict
    :param str weights: weights path
    :param int batch_size: batch size
    :param int img_size: image size
    :param float iou_thres: iou threshold
    :param float conf_thres: confidence threshold
    :param float nms_thres: nms threshold
    :param bool save_json: Whether to save the model
    :param str hyp: hyperparameter
    :param str model: yolov4 model
    :param bool single_cls: only one class
    :return: results
    """

    if model is None:
        device = select_device(opt.device)
        verbose = False
        # Initialize model
        model = Model(cfg, img_size).to(device)
        # Load weights
        if weights.endswith('.pt'):
            checkpoint = torch.load(weights, map_location=device)
            state_dict = intersect_dicts(checkpoint['model'],
                                         model.state_dict())
            model.load_state_dict(state_dict, strict=False)
        elif len(weights) > 0:
            load_darknet_weights(model, weights)
        print(f'Loaded weights from {weights}!')

        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
    else:
        device = next(model.parameters()).device
        verbose = False

    test_path = data['valid']
    num_classes, names = (1, ['item']) if single_cls else (int(
        data['num_classes']), data['names'])

    # Dataloader
    dataset = LoadImagesAndLabels(test_path, img_size, batch_size, hyp=hyp)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             num_workers=8,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    seen = 0
    model.eval()
    coco91class = coco80_to_coco91_class()
    output_format = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets',
                                             'Pre', 'Rec', 'mAP', 'F1')
    precision, recall, f_1, mean_pre, mean_rec, mean_ap, mf1 = 0., 0., 0., 0., 0., 0., 0.
    loss = torch.zeros(3)
    json_dict, stats, aver_pre, ap_class = [], [], [], []
    for batch_i, (imgs, targets, paths,
                  shapes) in enumerate(tqdm(dataloader, desc=output_format)):
        targets = targets.to(device)
        imgs = imgs.to(device) / 255.0
        _, _, height, width = imgs.shape  # batch size, channels, height, width

        # Plot images with bounding boxes
        if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
            plot_images(imgs=imgs,
                        targets=targets,
                        paths=paths,
                        fname='test_batch0.jpg')

        with torch.no_grad():
            inference_output, train_output = model(imgs)

            if hasattr(model, 'hyp'):  # if model has loss hyperparameters
                loss += compute_loss(train_output, targets,
                                     model)[1][:3].cpu()  # GIoU, obj, cls

            output = non_max_suppression(inference_output,
                                         conf_thres=conf_thres,
                                         nms_thres=nms_thres)

        # Statistics per image
        for i, pred in enumerate(output):
            labels = targets[targets[:, 0] == i, 1:]
            num_labels = len(labels)
            target_class = labels[:, 0].tolist() if num_labels else []
            seen += 1

            if pred is None:
                if num_labels:
                    stats.append(
                        ([], torch.Tensor(), torch.Tensor(), target_class))
                continue

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(Path(paths[i]).stem.split('_')[-1])
                box = pred[:, :4].clone()  # xyxy
                scale_coords(imgs[i].shape[1:], box,
                             shapes[i][0])  # to original shape
                box = xyxy2xywh(box)  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for det_i, det in enumerate(pred):
                    json_dict.append({
                        'image_id':
                        image_id,
                        'category_id':
                        coco91class[int(det[6])],
                        'bbox':
                        [float(format(x, '.%gf' % 3)) for x in box[det_i]],
                        'score':
                        float(format(det[4], '.%gf' % 5))
                    })

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = [0] * len(pred)
            if num_labels:
                detected = []
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                tbox[:, [0, 2]] *= width
                tbox[:, [1, 3]] *= height

                # Search for correct predictions
                for j, (*pbox, _, _, pcls) in enumerate(pred):

                    # Break if all targets already located in image
                    if len(detected) == num_labels:
                        break

                    # Continue if predicted class not among image classes
                    if pcls.item() not in target_class:
                        continue

                    # Best iou, index between pred and targets
                    mask = (pcls == tcls_tensor).nonzero(
                        as_tuple=False).view(-1)
                    iou, best_iou = bbox_iou(pbox, tbox[mask]).max(0)

                    # If iou > threshold and class is correct mark as correct
                    if iou > iou_thres and mask[
                            best_iou] not in detected:  # and pcls == target_class[bi]:
                        correct[j] = 1
                        detected.append(mask[best_iou])

            # Append statistics (correct, conf, pcls, target_class)
            stats.append(
                (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), target_class))

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in list(zip(*stats))]
    if len(stats):
        precision, recall, aver_pre, f_1, ap_class = ap_per_class(*stats)
        mean_pre, mean_rec, mean_ap, mf1 = precision.mean(), recall.mean(
        ), aver_pre.mean(), f_1.mean()
        num_targets = np.bincount(
            stats[3].astype(np.int64),
            minlength=num_classes)  # number of targets per class
    else:
        num_targets = torch.zeros(1)

    # Print results
    print_format = '%20s' + '%10.3g' * 6
    print(print_format %
          ('all', seen, num_targets.sum(), mean_pre, mean_rec, mean_ap, mf1))

    # Print results per class
    if verbose and num_classes > 1 and stats:
        for i, class_ in enumerate(ap_class):
            print(print_format %
                  (names[class_], seen, num_targets[class_], precision[i],
                   recall[i], aver_pre[i], f_1[i]))

    # Save JSON
    if save_json and mean_ap and json_dict:
        try:
            img_ids = [
                int(Path(x).stem.split('_')[-1]) for x in dataset.img_files
            ]
            with open('results.json', 'w') as file:
                json.dump(json_dict, file)

            # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            cocogt = COCO('data/coco/annotations/instances_val2017.json'
                          )  # initialize COCO ground truth api
            cocodt = cocogt.loadRes('results.json')  # initialize COCO pred api

            cocoeval = COCOeval(cocogt, cocodt, 'bbox')
            cocoeval.params.imgIds = img_ids  # [:32]  # only evaluate these images
            cocoeval.evaluate()
            cocoeval.accumulate()
            cocoeval.summarize()
            mean_ap = cocoeval.stats[1]  # update mAP to pycocotools mAP
        except ImportError:
            print(
                'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.'
            )

    # Return results
    maps = np.zeros(num_classes) + mean_ap
    for i, class_ in enumerate(ap_class):
        maps[class_] = aver_pre[i]
    return (mean_pre, mean_rec, mean_ap, mf1,
            *(loss / len(dataloader)).tolist()), maps
Exemplo n.º 4
0
    parser.add_argument(
        "--checkpoint",
        type=str,
        help="directory which contains pretrained model (.pth)")

    args = parser.parse_args()
    assert args.task in ['completion', 'classification', 'segmentation']

    # construct data loader
    train_dataloader, test_dataloader = load_dataset(args)

    model = Model(radius=args.radius,
                  bottleneck=args.bottleneck,
                  num_pts=args.num_pts,
                  num_pts_observed=args.num_pts_observed,
                  num_vote_train=args.num_vote_train,
                  num_contrib_vote_train=args.num_contrib_vote_train,
                  num_vote_test=args.num_vote_test,
                  is_vote=args.is_vote,
                  task=args.task)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = torch.nn.DataParallel(model)

    # evaluation
    if args.eval:
        model_path = os.path.join(args.checkpoint)
        if not os.path.isfile(model_path):
    def fit(self,
            x_train,
            y_train,
            x_test,
            y_test,
            trainable=True,
            custom=True,
            batch_size=32,
            epochs=5):

        self.create_name_fig(trainable, custom)
        self.new_model()
        label, indexes, counts_elements = np.unique(y_train,
                                                    return_counts=True,
                                                    return_index=True)
        self.min_label = label[np.argmin(counts_elements)]
        self.label = [y_train[index] for index in sorted(indexes)]
        save_object(os.path.join(self.model_folder, "label.pickle"),
                    self.label)
        save_object(os.path.join(self.model_folder, "min_label.pickle"),
                    self.min_label)

        self.feature_extraction = PreprocessingTemplate(
            self.config_dict, self.feature_extraction_folder)
        corpus = np.hstack((x_train, x_test))
        if self.config_dict["encoding"] == "embedding" and custom:
            _, self.embedding_matrix = self.feature_extraction.fit_transform(
                x_train)

        else:
            _ = self.feature_extraction.fit_transform(corpus)
        x_train = self.feature_extraction.transform(x_train)
        x_test = self.feature_extraction.transform(x_test)

        self.encoder = OneHotEncoding(self.feature_extraction_folder)
        self.encoder.fit(self.label)
        y_test = self.encoder.transform(y_test)

        self.n_out = len(label)
        if self.type_model == 'LSTM':
            params = {
                'units_size1': 256,
                'units_sizes': [128],
                'dense_size1': 500,
                'dense_size2': [300],
                'dropout': 0.004
            }
        else:
            params = {
                'filter_sizes': [1, 2],
                'nb_filter': 1024,
                'dense_size1': 400,
                'dense_size2': [250],
                'dropout': 0.001
            }
        model = Model(self.type_model,
                      vocabulary_size=len(self.feature_extraction.vocabulary),
                      embedding_dim=self.config_dict['embedding_dim'],
                      embedding_matrix=self.embedding_matrix,
                      trainable=trainable,
                      params=params,
                      filename=self.model_folder)

        for i in range(self.nb_dataset):
            x_temp, y_temp = generate_data(x_train, y_train, self.methode,
                                           self.threshold)
            y_temp = self.encoder.transform(y_temp)
            model.fit(x_train=x_temp,
                      y_train=y_temp,
                      x_test=x_test,
                      y_test=y_test,
                      batch_size=batch_size,
                      epochs=epochs,
                      i=i)
        self.calculate_weight(x_test=x_test, y_test=y_test)
        if self.cutoff == 'F1':
            self.find_optimal_cutoff(x_test=x_test, y_test=y_test)
        return
Exemplo n.º 6
0
def main():
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    # parse the configurations
    parser = argparse.ArgumentParser(
        description='Additioal configurations for training',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '--gpu_ids',
        type=str,
        default='-1',
        help=
        'IDs of GPUs to use (please use `,` to split multiple IDs); -1 means CPU only'
    )
    parser.add_argument('--tr_list',
                        type=str,
                        required=True,
                        help='Path to the list of training files')
    parser.add_argument('--cv_file',
                        type=str,
                        required=True,
                        help='Path to the cross validation file')
    parser.add_argument('--ckpt_dir',
                        type=str,
                        required=True,
                        help='Name of the directory to dump checkpoint')
    parser.add_argument('--unit',
                        type=str,
                        required=True,
                        help='Unit of sample, can be either `seg` or `utt`')
    parser.add_argument(
        '--logging_period',
        type=int,
        default=1000,
        help=
        'Logging period (also the period of cross validation) represented by the number of iterations'
    )
    parser.add_argument('--time_log',
                        type=str,
                        default='',
                        help='Log file for timing batch processing')
    parser.add_argument('--batch_size',
                        type=int,
                        default=16,
                        help='Minibatch size')
    parser.add_argument('--buffer_size',
                        type=int,
                        default=32,
                        help='Buffer size')
    parser.add_argument('--segment_size',
                        type=float,
                        default=4.0,
                        help='Length of segments used for training (seconds)')
    parser.add_argument('--segment_shift',
                        type=float,
                        default=1.0,
                        help='Shift of segments used for training (seconds)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='Initial learning rate for training')
    parser.add_argument('--lr_decay_factor',
                        type=float,
                        default=0.98,
                        help='Decaying factor of learning rate')
    parser.add_argument('--lr_decay_period',
                        type=int,
                        default=2,
                        help='Decaying period of learning rate (epochs)')
    parser.add_argument('--clip_norm',
                        type=float,
                        default=-1.0,
                        help='Gradient clipping (L2-norm)')
    parser.add_argument('--max_n_epochs',
                        type=int,
                        default=100,
                        help='Maximum number of epochs')
    parser.add_argument('--loss_log',
                        type=str,
                        default='loss.txt',
                        help='Filename of the loss log')
    parser.add_argument('--resume_model',
                        type=str,
                        default='',
                        help='Existing model to resume training from')

    args = parser.parse_args()
    logger.info('Arguments in command:\n{}'.format(pprint.pformat(vars(args))))

    model = Model()
    model.train(args)
Exemplo n.º 7
0
def main():
    """ Train and test

    :param opt: args
    :param writer: tensorboard
    :return:
    """

    global opt
    opt = parse()

    arc = opt.arc
    cfg = opt.cfg
    teacher_cfg = opt.teacher_cfg
    img_size = opt.img_size
    epochs = opt.epochs
    batch_size = opt.batch_size
    accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
    weights = opt.weights
    teacher_weights = opt.teacher_weights
    multi_scale = opt.multi_scale
    sparsity_training = opt.st

    opt.weights = last if opt.resume else opt.weights

    # Initial logging
    logging.basicConfig(
        format="%(message)s",
        level=logging.INFO if opt.local_rank in [-1, 0] else logging.WARN)

    # Train
    logger.info(opt)
    if opt.local_rank in [-1, 0]:
        logger.info('Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/')
        writer = SummaryWriter()

    # Hyperparameters
    with open(opt.hyp) as f_hyp:
        hyp = yaml.safe_load(f_hyp)
    # data dict
    with open(opt.data) as f_data:
        data = yaml.safe_load(f_data)

    # Distributed training initialize
    device = select_device(opt.device)
    if opt.local_rank != -1:
        dist.init_process_group(init_method="env://", backend='nccl')
        torch.cuda.set_device(opt.local_rank)
        device = torch.device(f"cuda:{opt.local_rank}")
        # world_size = torch.distributed.get_world_size()

    init_seeds()
    cuda = device.type != 'cpu'
    torch.backends.cudnn.benchmark = True

    if multi_scale:
        img_size_min = round(img_size / 32 / 1.5) + 1
        img_size_max = round(img_size / 32 * 1.5) - 1
        img_size = img_size_max * 32  # initiate with maximum multi_scale size
        logger.info(f'Using multi-scale  {img_size_min * 32} - {img_size}')

    train_path = data['train']
    num_classes = int(data['num_classes'])  # number of classes

    # Load dataset
    dataset = LoadImagesAndLabels(train_path,
                                  img_size,
                                  batch_size,
                                  augment=True,
                                  hyp=hyp,
                                  rect=opt.rect)
    train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) if opt.local_rank != -1 else None
    num_worker = os.cpu_count() // torch.cuda.device_count()
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             num_workers=min([num_worker, batch_size, 8]),
                                             shuffle=not (opt.rect or train_sampler),
                                             sampler=train_sampler,
                                             pin_memory=True,
                                             collate_fn=dataset.collate_fn)

    # Load model
    model = Model(cfg, img_size, arc=arc).to(device)

    # Load teacher model
    if teacher_cfg:
        teacher_model = Model(teacher_cfg, img_size, arc).to(device)

    # optimizer parameter groups
    param_group0, param_group1 = [], []
    for key, value in model.named_parameters():
        if 'Conv2d.weight' in key:
            param_group1.append(value)
        else:
            param_group0.append(value)
    if opt.adam:
        optimizer = optim.Adam(param_group0, lr=hyp['lr0'])
    else:
        optimizer = optim.SGD(param_group0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
    # add param_group1 with weight_decay
    optimizer.add_param_group({'params': param_group1, 'weight_decay': hyp['weight_decay']})
    logger.info(f'Optimizer groups: {len(param_group1)} conv.weight, {len(param_group0)} other')
    del param_group0, param_group1

    start_epoch = 0
    best_fitness = 0.
    if weights.endswith('.pt'):
        checkpoint = torch.load(weights, map_location=device)
        state_dict = intersect_dicts(checkpoint['model'], model.state_dict())
        model.load_state_dict(state_dict, strict=False)
        print('loaded weights from', weights, '\n')

        # load optimizer
        if checkpoint['optimizer'] is not None:
            optimizer.load_state_dict(checkpoint['optimizer'])
            best_fitness = checkpoint['best_fitness']
        # load results
        if checkpoint.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(checkpoint['training_results'])
        # resume
        if opt.resume:
            start_epoch = checkpoint['epoch'] + 1
        del checkpoint

    elif len(weights) > 0:
        # weights are 'yolov4.weights', 'darknet53.conv.74' etc.
        load_darknet_weights(model, weights)
        logger.info(f'loaded weights from {weights}\n')

    # Load teacher weights
    if teacher_cfg:
        if teacher_weights.endswith('.pt'):
            teacher_model.load_state_dict(torch.load(teacher_weights, map_location=device)['model'])
        elif teacher_weights.endswith('.weights'):
            load_darknet_weights(teacher_model, teacher_weights)
        else:
            raise Exception('pls provide proper teacher weights for knowledge distillation')
        if not mixed_precision:
            teacher_model.eval()
        logger.info('<......................using knowledge distillation....................>')
        logger.info(f'teacher model: {teacher_weights}\n')

    # Sparsity training
    if opt.prune == 0:
        _, _, prune_index = parse_module_index(model.module_dicts)
        if sparsity_training:
            logger.info('normal sparse training')

    if mixed_precision:
        if teacher_cfg:
            [model, teacher_model], optimizer = amp.initialize([model, teacher_model], optimizer,
                                                               opt_level='O1', verbosity=1)
        else:
            model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=1)

    # SyncBatchNorm and distributed training
    if cuda and opt.local_rank != -1:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
        model = model.to(device)
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[opt.local_rank])
        model.module_list = model.module.module_list
        model.yolo_layers = model.module.yolo_layers

    for index in prune_index:
        bn_weights = gather_bn_weights(model.module_list, [index])
        if opt.local_rank == 0:
            writer.add_histogram('before_train_per_layer_bn_weights/hist', bn_weights.numpy(), index, bins='doane')

    # Start training
    model.num_classes = num_classes
    model.arc = opt.arc
    model.hyp = hyp
    num_batch_size = len(dataloader)
    # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
    results = (0, 0, 0, 0, 0, 0, 0)
    start_train_time = time.time()
    logger.info('Image sizes %d \n Starting training for %d epochs...', img_size, epochs)

    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
        model.train()

        mean_losses = torch.zeros(4).to(device)
        mean_soft_target = torch.zeros(1).to(device)
        pbar = enumerate(dataloader)
        logger.info(('\n %10s %10s %10s %10s %10s %10s %10s %10s'), 'Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total',
                    'targets', 'img_size')
        if opt.local_rank in [-1, 0]:
            pbar = tqdm(pbar, total=num_batch_size)
        optimizer.zero_grad()

        for i, (imgs, targets, _, _) in pbar:  # batch -------------------------------------------------------------
            num_integrated_batches = i + num_batch_size * epoch

            # Adjust the learning rate
            learning_rate = adjust_learning_rate(optimizer, num_integrated_batches, num_batch_size, hyp, epoch, epochs)
            if i == 0 and opt.local_rank in [-1, 0]:
                logger.info(f'learning rate: {learning_rate}')
            imgs = imgs.to(device) / 255.0
            targets = targets.to(device)

            # Multi-Scale training
            if multi_scale:
                if num_integrated_batches / accumulate % 10 == 0:
                    img_size = random.randrange(img_size_min, img_size_max + 1) * 32
                scale_factor = img_size / max(imgs.shape[2:])
                if scale_factor != 1:
                    new_shape = [math.ceil(x * scale_factor / 32.) * 32 for x in imgs.shape[2:]]
                    imgs = F.interpolate(imgs, size=new_shape, mode='bilinear', align_corners=False)

            pred = model(imgs)

            # Compute loss
            loss, loss_items = compute_loss(pred, targets, model)

            # knowledge distillation
            soft_target = 0
            if teacher_cfg:
                if mixed_precision:
                    with torch.no_grad():
                        output_teacher = teacher_model(imgs)
                else:
                    _, output_teacher = teacher_model(imgs)
                soft_target = distillation_loss(pred, output_teacher, model.num_classes, imgs.size(0))
                loss += soft_target

            # Scale loss by nominal batch_size of 64
            loss *= batch_size / 64

            # Compute gradient
            if mixed_precision:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            # Sparse the BN layer that needs pruning
            if sparsity_training:
                # bn_l1_regularization(model.module_list, opt.penalty_factor, cba_index, epoch, epochs)
                bn_l1_regularization(model.module_list, opt.penalty_factor, prune_index, epoch, epochs)

            # Accumulate gradient for x batches before optimizing
            if num_integrated_batches % accumulate == 0:
                optimizer.step()
                optimizer.zero_grad()

            if opt.local_rank in [-1, 0]:
                mean_losses = (mean_losses * i + loss_items) / (i + 1)
                mean_soft_target = (mean_soft_target * i + soft_target) / (i + 1)
                memory = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0  # (GB)
                description = ('%10s' * 2 + '%10.3g' * 6) % (
                    '%g/%g' % (epoch, epochs - 1), '%.3gG' % memory, *mean_losses, mean_soft_target, img_size)
                pbar.set_description(description)

            # end batch ------------------------------------------------------------------------------------------------

        # Update scheduler
        # scheduler.step()

        if opt.local_rank in [-1, 0]:
            final_epoch = epoch + 1 == epochs
            # Calculate mAP
            if not (opt.notest or opt.nosave) or final_epoch:
                with torch.no_grad():
                    results, _ = test(cfg, data,
                                      batch_size=batch_size,
                                      img_size=opt.img_size,
                                      model=model,
                                      conf_thres=0.001 if final_epoch and epoch > 0 else 0.1,  # 0.1 for speed
                                      save_json=final_epoch and epoch > 0)

            # Write epoch results
            with open(results_file, 'a') as file:
                # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
                file.write(description + '%10.3g' * 7 % results + '\n')

            # Write Tensorboard results
            if writer:
                outputs = list(mean_losses) + list(results)
                titles = ['GIoU', 'Objectness', 'Classification', 'Train loss',
                          'Precision', 'Recall', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification']
                for output, title in zip(outputs, titles):
                    writer.add_scalar(title, output, epoch)
                bn_weights = gather_bn_weights(model.module_list, prune_index)
                writer.add_histogram('bn_weights/hist', bn_weights.numpy(), epoch, bins='doane')

            # Update best mAP
            fitness = results[2]
            if fitness > best_fitness:
                best_fitness = fitness

            # Save training results
            save = (not opt.nosave) or (final_epoch and not opt.evolve)
            if save and opt.local_rank == 0:
                with open(results_file, 'r') as file:
                    # Create checkpoint
                    checkpoint = {'epoch': epoch,
                                  'best_fitness': best_fitness,
                                  'training_results': file.read(),
                                  'model': model.module.state_dict() if isinstance(
                                   model, nn.parallel.DistributedDataParallel) else model.state_dict(),
                                  'optimizer': None if final_epoch else optimizer.state_dict()}

                # Save last checkpoint
                torch.save(checkpoint, last)

                # Save best checkpoint
                if best_fitness == fitness:
                    torch.save(checkpoint, best)

                # Delete checkpoint
                del checkpoint

            # end epoch -----------------------------------------------------------------------------------------------
    # end training

    if opt.local_rank in [-1, 0]:
        if len(opt.name):
            os.rename('results.txt', 'results_%s.txt' % opt.name)
        plot_results()  # save as results.png
        print(f'{epoch - start_epoch + 1} epochs completed in {(time.time() - start_train_time) / 3600:.3f} hours.\n')
    if torch.cuda.device_count() > 1:
        dist.destroy_process_group()
    torch.cuda.empty_cache()
    return results