Exemplo n.º 1
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--csv_annotations_path', help='Path to CSV annotations')
    parser.add_argument('--model_path', help='Path to model', type=str)
    parser.add_argument('--images_path',help='Path to images directory',type=str)
    parser.add_argument('--class_list_path',help='Path to classlist csv',type=str)
    parser.add_argument('--iou_threshold',help='IOU threshold used for evaluation',type=str, default='0.5')
    parser = parser.parse_args(args)

    #dataset_val = CocoDataset(parser.coco_path, set_name='val2017',transform=transforms.Compose([Normalizer(), Resizer()]))
    dataset_val = CSVDataset(parser.csv_annotations_path,parser.class_list_path,transform=transforms.Compose([Normalizer(), Resizer()]))
    # Create the model
    #retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True)
    retinanet=torch.load(parser.model_path)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        #retinanet.load_state_dict(torch.load(parser.model_path))
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet.load_state_dict(torch.load(parser.model_path))
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = False
    retinanet.eval()
    retinanet.module.freeze_bn()

    print(csv_eval.evaluate(dataset_val, retinanet,iou_threshold=float(parser.iou_threshold)))
Exemplo n.º 2
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--csv_annotations_path',
                        help='Path to CSV annotations')
    parser.add_argument('--model_path', help='Path to model', type=str)
    parser.add_argument('--images_path',
                        help='Path to images directory',
                        type=str)
    parser.add_argument('--class_list_path',
                        help='Path to classlist csv',
                        type=str)
    parser.add_argument('--iou_threshold',
                        help='IOU threshold used for evaluation',
                        type=str,
                        default='0.5')
    parser = parser.parse_args(args)

    #dataset_val = CocoDataset(parser.coco_path, set_name='val2017',transform=transforms.Compose([Normalizer(), Resizer()]))
    dataset_val = CSVDataset(parser.csv_annotations_path,
                             parser.class_list_path,
                             transform=transforms.Compose(
                                 [Normalizer(), Resizer()]))
    # Create the model
    #retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True)
    config = dict({"scales": None, "ratios": None})
    config = load_config("config2.yaml", config)
    retinanet = model.resnet50(num_classes=dataset_val.num_classes(),
                               pretrained=False,
                               ratios=config["ratios"],
                               scales=config["scales"])

    retinanet, _, _ = load_ckpt(parser.model_path, retinanet)

    use_gpu = True

    if use_gpu:
        print("Using GPU for validation process")
        if torch.cuda.is_available():
            retinanet = torch.nn.DataParallel(retinanet.cuda())
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = False
    retinanet.eval()
    retinanet.module.freeze_bn()

    print(
        csv_eval.evaluate(dataset_val,
                          retinanet,
                          score_threshold=0.4,
                          iou_threshold=float(parser.iou_threshold)))
Exemplo n.º 3
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--csv',
                        help='Path to dataset file you would like to evaluate')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument('--model_path', help='Path to the model file.')
    parser.add_argument('--configfile', help='Path to the config file.')

    parser = parser.parse_args(args)
    configs = configparser.ConfigParser()
    configs.read(parser.configfile)

    try:
        maxside = int(configs['TRAINING']['maxside'])
        minside = int(configs['TRAINING']['minside'])
    except Exception as e:
        print(e)
        print(
            'CONFIG FILE IS INVALID. PLEASE REFER TO THE EXAMPLE CONFIG FILE AT config.txt'
        )
        sys.exit()

    if parser.csv is None:
        dataset_eval = None
        print('No validation annotations provided.')
    else:
        dataset_eval = CSVDataset(train_file=parser.csv,
                                  class_list=parser.csv_classes,
                                  transform=transforms.Compose([
                                      Normalizer(),
                                      Resizer(min_side=minside,
                                              max_side=maxside)
                                  ]))
    retinanet = load_model(parser.model_path, parser.configfile)

    mAP = csv_eval.evaluate(dataset_eval, retinanet)
    print('-----------------')
    print(mAP)
    print('-----------------')
def main():

    batch_size, dataset_train, dataset_val = _make_dataset()
    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                               pretrained=True)

    if torch.cuda.is_available():
        retinanet = retinanet.cuda()
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    # retinanet.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    BEST_MAP = 0
    BEST_MAP_EPOCH = 0
    for epoch_num in range(cfg.EPOCHS):

        retinanet.train()
        # retinanet.freeze_bn()
        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            # try:
            optimizer.zero_grad()

            if cfg.MIXUP:
                data, lam = mixup(data)

            if torch.cuda.is_available():
                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])
            else:
                classification_loss, regression_loss = retinanet(
                    [data['img'].float(), data['annot']])

            if cfg.MIXUP:
                classification_loss, regression_loss = mix_loss(
                    classification_loss, regression_loss, lam)

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()
            loss = classification_loss + regression_loss
            if bool(loss == 0):
                continue
            loss.backward()

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
            optimizer.step()

            loss_hist.append(float(loss))
            epoch_loss.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                .format(epoch_num, iter_num, float(classification_loss),
                        float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss

        # except Exception as e:
        #     print(e)
        #     continue
        """ validation part """
        print('Evaluating dataset')
        average_precisions, mAP = csv_eval.evaluate(dataset_val, retinanet)
        if mAP > BEST_MAP:
            best_average_precisions = average_precisions
            BEST_MAP = mAP
            BEST_MAP_EPOCH = epoch_num
        scheduler.step(np.mean(epoch_loss))
        # torch.save(retinanet.module, '{}_retinanet_{}.pt'.format('voc', epoch_num)))
    retinanet.eval()

    print('\nBest_mAP:', BEST_MAP_EPOCH)
    for label in range(dataset_val.num_classes()):
        label_name = dataset_val.label_to_name(label)
        print('{}: {}'.format(label_name, best_average_precisions[label][0]))
    print('BEST MAP: ', BEST_MAP)
Exemplo n.º 5
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--configfile',
                        help='Path to the config file',
                        default='config.txt',
                        type=str)
    parser.add_argument(
        '--model',
        help=
        'Path to the pretrained model file state dict where training must start from, '
        'if you want to use a pretrained retinanet.',
        default=None,
        type=str)
    parser = parser.parse_args(args)

    configs = configparser.ConfigParser()
    configs.read(parser.configfile)

    try:
        batchsize = int(configs['TRAINING']['batchsize'])
        depth = int(configs['TRAINING']['depth'])
        maxepochs = int(configs['TRAINING']['maxepochs'])
        maxside = int(configs['TRAINING']['maxside'])
        minside = int(configs['TRAINING']['minside'])
        savepath = configs['TRAINING']['savepath']
        lr_start = float(configs['TRAINING']['lr_start'])
        lr_reduce_on_plateau_factor = float(
            configs['TRAINING']['lr_reduce_on_plateau_factor'])
        lr_reduce_on_plateau_patience = int(
            configs['TRAINING']['lr_reduce_on_plateau_patience'])
        earlystopping_patience = int(
            configs['TRAINING']['earlystopping_patience'])
        try:
            ratios = json.loads(configs['MODEL']['ratios'])
            scales = json.loads(configs['MODEL']['scales'])
        except Exception as e:
            print(e)
            print('USING DEFAULT RATIOS AND SCALES')
            ratios = None
            scales = None
    except Exception as e:
        print(e)
        print(
            'CONFIG FILE IS INVALID. PLEASE REFER TO THE EXAMPLE CONFIG FILE AT config.txt'
        )
        sys.exit()

    model_save_dir = datetime.now().strftime(
        "%d_%b_%Y_%H_%M") if savepath == 'datetime' else savepath

    if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir, exist_ok=True)

    # Copy the config file into the model save directory
    shutil.copy(parser.configfile, os.path.join(model_save_dir, 'config.txt'))
    # Create the data loaders
    if parser.csv_train is None:
        raise ValueError('Must provide --csv_train,')

    if parser.csv_classes is None:
        raise ValueError('Must provide --csv_classes,')

    dataset_train = CSVDataset(train_file=parser.csv_train,
                               class_list=parser.csv_classes,
                               transform=transforms.Compose([
                                   Normalizer(),
                                   Augmenter(),
                                   Resizer(min_side=minside, max_side=maxside)
                               ]))

    if parser.csv_val is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        dataset_val = CSVDataset(train_file=parser.csv_val,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose([
                                     Normalizer(),
                                     Resizer(min_side=minside,
                                             max_side=maxside)
                                 ]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batchsize,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)
    dataloader_val = None

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True,
                                   ratios=ratios,
                                   scales=scales,
                                   no_nms=False)
    elif depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True,
                                   ratios=ratios,
                                   scales=scales,
                                   no_nms=False)
    elif depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True,
                                   ratios=ratios,
                                   scales=scales,
                                   no_nms=False)
    elif depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True,
                                    ratios=ratios,
                                    scales=scales,
                                    no_nms=False)
    elif depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True,
                                    ratios=ratios,
                                    scales=scales,
                                    no_nms=False)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=lr_start)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=lr_reduce_on_plateau_patience,
        verbose=True,
        factor=lr_reduce_on_plateau_factor,
        cooldown=1,
        min_lr=1e-10)

    loss_hist = collections.deque(maxlen=500)

    if (parser.model):
        print(
            f'TRYING TO LOAD PRETRAINED MODEL AVAILABLE AT: {parser.model}. MAKE SURE THE MODEL CONFIGS MATCH!!!!!'
        )
        if torch.cuda.is_available():
            retinanet.load_state_dict(torch.load(parser.model))
        else:
            retinanet.load_state_dict(
                torch.load(parser.model, map_location=torch.device('cpu')))
        print(f'LOADED PRETRAINED MODEL : {parser.model}')
    retinanet.train()
    retinanet.module.freeze_bn()
    earlystopping = EarlyStopping(patience=earlystopping_patience,
                                  verbose=True,
                                  delta=1e-10,
                                  path=os.path.join(model_save_dir,
                                                    'best_model.pt'))
    print('Num training images: {}'.format(len(dataset_train)))

    loss_dict = OrderedDict()
    val_loss_dict = OrderedDict()

    for epoch_num in range(maxepochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        epoch_val_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot']])
                else:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)),
                    end='\r',
                    flush=True)

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if (len(epoch_loss)):
            loss_dict[epoch_num] = np.mean(epoch_loss)

        print('')

        if dataloader_val is not None:
            print('Evaluating dataset')
            for iter_num, data in enumerate(dataloader_val):
                try:
                    with torch.no_grad():
                        if torch.cuda.is_available():
                            val_classification_loss, val_regression_loss = retinanet(
                                [data['img'].cuda().float(), data['annot']])
                        else:
                            val_classification_loss, val_regression_loss = retinanet(
                                [data['img'].float(), data['annot']])

                        val_classification_loss = val_classification_loss.mean(
                        )
                        val_regression_loss = val_classification_loss.mean()

                        val_loss = val_classification_loss + val_regression_loss
                        print('Validation Loss: {:1.5f}'.format(val_loss),
                              end='\r',
                              flush=True)
                        epoch_val_loss.append(float(val_loss))

                except Exception as e:
                    print(e)
                    continue
            print('')
            if (len(epoch_val_loss)):
                val_loss_dict[epoch_num] = np.mean(epoch_val_loss)

            retinanet.eval()
            mAP = csv_eval.evaluate(dataset_val, retinanet)
            print('-----------------')
            print(mAP)
            print('-----------------')
        scheduler.step(np.mean(epoch_loss))

        model_save_path = os.path.join(model_save_dir,
                                       f'retinanet_{epoch_num}.pt')
        save_model(retinanet, model_save_path)
        print(f'Saved model of epoch {epoch_num} to {model_save_path}')

        earlystopping(val_loss_dict[epoch_num], retinanet)

        if earlystopping.early_stop:
            print("Early stopping")
            break

    retinanet.eval()
    save_model(retinanet, os.path.join(model_save_dir, 'model_final.pt'))

    with open(os.path.join(model_save_dir, 'loss_history.txt'), 'w') as f:
        for epoch_num, loss in loss_dict.items():
            f.write(f'{epoch_num}:{loss} \n')
    with open(os.path.join(model_save_dir, 'val_loss_history.txt'), 'w') as f:
        for epoch_num, loss in val_loss_dict.items():
            f.write(f'{epoch_num}:{loss} \n')

    # Write configs to model save directory
    configs = configparser.ConfigParser()
    configs.read(os.path.join(model_save_dir, 'config.txt'))
    configs['TRAINING']['num_classes'] = str(dataset_train.num_classes())

    for iter_num, data in enumerate(dataloader_train):
        configs['MODEL']['input_shape'] = str(
            list(data['img'].float().numpy().shape[1:]))
        break

    # Write class mapping to the model configs.
    with open(parser.csv_classes, 'r') as f:
        labels = load_classes_from_csv_reader(csv.reader(f, delimiter=','))

    configs['LABELMAP'] = {str(i): str(j) for i, j in labels.items()}
    with open(os.path.join(model_save_dir, 'config.txt'), 'w') as configfile:
        configs.write(configfile)
Exemplo n.º 6
0
    plt.show()

#%%
# Compare the various models and show the inference time as well as mAP

# model_names = ['retinanet_res18.pt', 'retinanet_res50.pt', 'retinanet_res101.pt']
model_names = ['model_final5.pt', 'retinanet_res18.pt', 'model_final_12.pt']

start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

for mod in model_names:
    model = torch.load(wd + mod)
    start.record()
    csv_eval.evaluate(dataset_val, model)
    end.record()

    # Waits for everything to finish running
    torch.cuda.synchronize()

    print(mod, start.elapsed_time(end) / len(dataset_val))

#%%
# Read in the satellite images for further test, compile a csv file formatted for testing

os.chdir('/home/ubuntu/project/sat_image/')
sat_images = [i for i in glob.iglob('*.tif')]

sat_images_df = pd.DataFrame(columns=[1, 2, 3, 4, 5, 6])
for file in sat_images:
Exemplo n.º 7
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--local_rank', help='Local rank', type=int, default=0)
    parser.add_argument('--distributed', action='store_true')
    parser.add_argument('--pretrained', action='store_true')

    parser = parser.parse_args(args)

    torch.cuda.set_device(parser.local_rank)
    DISTRIBUTED = parser.distributed and config.DISTRIBUTED
    if DISTRIBUTED:
        distributed.init_process_group(backend="nccl")
    device = torch.device(f'cuda:{parser.local_rank}')

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    if DISTRIBUTED:
        sampler = DistributedSampler(dataset_train)
        dataloader_train = DataLoader(dataset_train,
                                      num_workers=4,
                                      batch_size=batch_size,
                                      collate_fn=collater,
                                      sampler=sampler,
                                      pin_memory=True,
                                      drop_last=True)
        if dataset_val is not None:
            sampler_val = DistributedSampler(dataset_val)
            dataloader_val = DataLoader(dataset_val,
                                        batch_size=1,
                                        num_workers=4,
                                        collate_fn=collater,
                                        sampler=sampler_val,
                                        pin_memory=True,
                                        drop_last=True)
    else:
        sampler = AspectRatioBasedSampler(dataset_train,
                                          batch_size=batch_size,
                                          drop_last=False)
        dataloader_train = DataLoader(dataset_train,
                                      num_workers=4,
                                      collate_fn=collater,
                                      batch_sampler=sampler,
                                      pin_memory=True)
        if dataset_val is not None:
            sampler_val = AspectRatioBasedSampler(dataset_val,
                                                  batch_size=1,
                                                  drop_last=False)
            dataloader_val = DataLoader(dataset_val,
                                        num_workers=4,
                                        collate_fn=collater,
                                        batch_sampler=sampler_val,
                                        pin_memory=True)

    # Create the model
    if parser.depth == 18:
        retinanet = model.retinanet18(num_classes=dataset_train.num_classes(),
                                      pretrained=parser.pretrained)
    elif parser.depth == 34:
        retinanet = model.retinanet34(num_classes=dataset_train.num_classes(),
                                      pretrained=parser.pretrained)
    elif parser.depth == 50:
        retinanet = model.retinanet50(num_classes=dataset_train.num_classes(),
                                      pretrained=parser.pretrained)
    elif parser.depth == 101:
        retinanet = model.retinanet101(num_classes=dataset_train.num_classes(),
                                       pretrained=parser.pretrained)
    elif parser.depth == 152:
        retinanet = model.retinanet152(num_classes=dataset_train.num_classes(),
                                       pretrained=parser.pretrained)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if use_cuda:
        retinanet = retinanet.cuda()

    if RESTORE:
        retinanet.load_state_dict(torch.load(RESTORE))

    if DISTRIBUTED:
        retinanet = torch.nn.parallel.DistributedDataParallel(
            retinanet, device_ids=[parser.local_rank])
        print("Let's use", parser.local_rank, "GPU!")

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    if DISTRIBUTED:
        retinanet.module.freeze_bn()
    else:
        retinanet.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):
        save_to_disk = parser.local_rank == 0
        retinanet.train()
        if DISTRIBUTED:
            retinanet.module.freeze_bn()
        else:
            retinanet.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                if use_cuda:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                if save_to_disk:
                    print(
                        'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue
        if save_to_disk:
            if parser.dataset == 'coco':

                print('Evaluating dataset')

                coco_eval.evaluate_coco(dataset_val, retinanet)

            elif parser.dataset == 'csv' and parser.csv_val is not None:

                print('Evaluating dataset')

                mAP = csv_eval.evaluate(dataset_val, retinanet)

            scheduler.step(np.mean(epoch_loss))
            if DISTRIBUTED:
                torch.save(
                    retinanet.module.state_dict(),
                    '{}/{}_retinanet_{}.pt'.format(checkpoints_dir,
                                                   parser.dataset, epoch_num))
            else:
                torch.save(
                    retinanet.state_dict(),
                    '{}/{}_retinanet_{}.pt'.format(checkpoints_dir,
                                                   parser.dataset, epoch_num))
                continue

            loss.backward()
            pt.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
            optimizer.step()
            loss_hist.append(float(loss))
            epoch_loss.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                .format(epoch_num + 1, iter_num, float(classification_loss),
                        float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss
        except Exception as e:
            print(e)
            continue

    print('Evaluating dataset')
    mAP = csv_eval.evaluate(dataset_test, retinanet)
    #scheduler.step(np.mean(epoch_loss))

print(f'Mean average precision = {mAP}')
retinanet.eval()
pt.save(retinanet, 'model_final.pt')

#plot predictions
scores, classification, transformed_anchors = plot_predictions(
    test_df, class_df, 'filename', 3, retinanet, 0.1)
Exemplo n.º 9
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    epoch_loss_mem = -999999
    in_a_row = 0

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot']])
                else:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)
            map_val = float(mAP.get(0)[0])

            if map_val < epoch_loss_mem:
                in_a_row += 1
                if in_a_row >= PATIENCE:
                    print('Early Stop, Epoch', epoch_num)
                    break
                else:
                    print('Validation Performance Decreased for', in_a_row,
                          'Run(s)')
            else:
                epoch_loss_mem = map_val
                in_a_row = 0

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module,
                   '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt')
Exemplo n.º 10
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)

    parser.add_argument('--dcn_layers', type =str, help = 'comma seperated str where laters to be used, 0..3',default = None)
    parser.add_argument('--use_depth', action='store_true', help='if specified, use depth for deformconv')
    parser = parser.parse_args(args)
    use_dcn = [False, False, False, False]
    
    if parser.dcn_layers is not None:    
        _t = parser.dcn_layers.split(',')
        for __t in _t:
            use_dcn[int(__t)] = True
    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path, set_name='train2017',
                                    transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
                                  transform=transforms.Compose([Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
                                   transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
                                     transform=transforms.Compose([Normalizer(), Resizer()]))

    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train, batch_size=128, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True, use_dcn = use_dcn, use_depth = parser.use_depth)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True
    writer = SummaryWriter()

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    #mAP = csv_eval.evaluate(dataset_val, retinanet)
    global_step = 0
    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                global_step += 1
                if torch.cuda.is_available():
                    if parser.use_depth and 'depth' in data:
                        classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']],depth = data['depth'].cuda())
                    else:
                        classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
                else:
                    if parser.use_depth and 'depth' in data:
                        classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']],depth=data['depth'])
                    else:
                        classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']])
                    
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                writer.add_scalar('CLS Loss',classification_loss,global_step)
                writer.add_scalar('REG Loss',regression_loss,global_step)
                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                        epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt')
    writer.close()
Exemplo n.º 11
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument(
        '--dataset', help='Dataset type, must be one of csv or coco.')  #数据集类型
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)  #选择与训练模型
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    #决定图片数据集的顺序和batch_size,返回的是图片的分组
    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    #多GPU运行
    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    #collections:模块实现了特定目标的容器,以提供Python标准内建容器 dict、list、set、tuple 的替代选择
    #collections.deque:返回双向队列对象,最长长度为500
    loss_hist = collections.deque(maxlen=500)

    # model.train() :启用 BatchNormalization 和 Dropout
    # model.eval() :不启用 BatchNormalization 和 Dropout
    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                #反向传播
                loss.backward()

                #梯度裁剪,梯度小于/大于阈值时,更新的梯度为阈值(此处为小于0.1)
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                #更新所有的参数,一旦梯度被如backward()之类的函数计算好后,我们就可以调用这个函数
                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        #optimizer.step()通常用在每个mini-batch之中,而scheduler.step()通常用在epoch里面
        #有用了optimizer.step(),模型才会更新,而scheduler.step()是对lr进行调整。
        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module,
                   '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt')
Exemplo n.º 12
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument("--data_config",
                        type=str,
                        default="data/retina_label/custom.data",
                        help="path to data config file")
    parser.add_argument(
        "--n_cpu",
        type=int,
        default=8,
        help="number of cpu threads to use during batch generation")
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=15)
    parser.add_argument("--batch_size",
                        type=int,
                        default=4,
                        help="size of each image batch")
    parser.add_argument('--pretrained_model',
                        type=str,
                        default=None,
                        help='load pretrained model')
    parser.add_argument('--optim_scheduler',
                        type=str,
                        default=None,
                        help='load pretrained optimizer and scheduler')
    parser.add_argument(
        "--attack_type",
        type=str,
        default="Normal",
        help="type of adversarial attack; Normal or FGSM or PGD")
    parser.add_argument("--eps",
                        type=str,
                        default='2',
                        help="epsilon value for FGSM")
    parser.add_argument("--alpha", type=float, default=0.5)
    parser.add_argument(
        "--sign_grad",
        type=bool,
        default=True,
        help="whether use signed gradient and alpha=2.5*eps/iter in PGD")
    parser.add_argument("--iterations", type=int, default=10)
    parser.add_argument("--irl", type=int, default=0)
    parser.add_argument("--irl_noise_type", type=str, default='in_domain')
    parser.add_argument("--irl_loss_type", type=int, default=1)
    parser.add_argument("--irl_attack_type",
                        type=str,
                        default='fgsm',
                        help="type of attack to be implemented in small case")
    parser.add_argument("--irl_alpha", type=float, default='0.8')
    parser.add_argument("--irl_beta", type=float, default='0.2')
    parser.add_argument("--irl_gamma", type=float, default='1')
    parser.add_argument("--irl_alt", type=int, default=0)
    parser.add_argument(
        "--irl_avg",
        type=int,
        default=0,
        help="Set true to average over all layers in irl distance loss")
    parser.add_argument(
        "--mix_thre",
        type=float,
        default=0.5,
        help=
        "percentage of clean data in each mixed batch; range:[0,1], the larger, the more clean data there are in each batch"
    )
    parser.add_argument("--checkpoint_interval",
                        type=int,
                        default=1,
                        help="interval between saving model weights")
    parser.add_argument("--evaluation_interval",
                        type=int,
                        default=1,
                        help="interval evaluations on validation set")
    parser.add_argument("--evaluation_attack_interval",
                        type=int,
                        default=3,
                        help="interval evaluations on validation set")
    parser.add_argument("--evalute_attacktype",
                        type=str,
                        default='FGSM',
                        help="FGSM/Randn/Normal")
    parser = parser.parse_args(args)
    print(parser)
    eps = convert_eps(parser.eps)
    training_name = train_name(parser, eps)
    os.makedirs(f"checkpoints/retina/{training_name}", exist_ok=False)
    print(f"checkpoints stored as {training_name}")
    # Get data configuration
    data_config = parse_data_config(parser.data_config)
    train_path = data_config["train"]
    val_path = data_config["val"]
    class_names = data_config["names"]

    dataset_train = CSVDataset(train_file=train_path,
                               class_list=class_names,
                               transform=transforms.Compose(
                                   [Augmenter(), Resizer()]))

    dataset_val = CSVDataset(train_file=val_path,
                             class_list=class_names,
                             transform=transforms.Compose([Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=parser.n_cpu,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=parser.batch_size, drop_last=False)
    # dataloader_val = DataLoader(dataset_val, num_workers=parser.n_cpu, collate_fn=collater, batch_sampler=sampler_val)
    if parser.pretrained_model:
        retinanet = torch.load(parser.pretrained_model)
    else:
        if parser.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif parser.depth == 34:
            retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif parser.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif parser.depth == 101:
            retinanet = model.resnet101(
                num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 152:
            retinanet = model.resnet152(
                num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if torch.cuda.is_available():
        retinanet = retinanet.cuda()
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    use_irl = bool(parser.irl)
    irl_alt = bool(parser.irl_alt)
    irl_avg = bool(parser.irl_avg)
    if use_irl:
        irl_obj = IRL(noise_types=[parser.irl_noise_type],
                      adv_attack_type=parser.irl_attack_type,
                      model_type='retina',
                      loss_type=parser.irl_loss_type,
                      epsilon=eps,
                      alpha=parser.alpha,
                      iterations=parser.iterations)
        act_file_name = ('retina_fnl_layers-resnet4_loss-type' +
                         str(parser.irl_loss_type) + '_' +
                         parser.irl_noise_type + '_alt' + str(parser.irl_alt))
        act_file_name += f"-alpha{parser.irl_alpha}-beta{parser.irl_beta}-gamma{parser.irl_gamma}_activations.txt"
        print("Saving activations in: ", str(act_file_name))

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    if parser.optim_scheduler is not None:
        optim_scheduler = torch.load(parser.optim_scheduler)
        optimizer.load_state_dict(optim_scheduler['optimizer'])
        scheduler.load_state_dict(optim_scheduler['scheduler'])
    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    print('Starting training.')
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for epoch_num in range(parser.epochs):
        retinanet.train()
        retinanet.module.freeze_bn()
        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):

            optimizer.zero_grad()
            batch_mixed = mix_batch(retinanet,
                                    data['img'],
                                    data['annot'],
                                    data['img'].shape[0],
                                    epsilon=eps,
                                    alpha=parser.alpha,
                                    mix_thre=parser.mix_thre,
                                    attack_type=parser.attack_type,
                                    model_type='retina',
                                    sign_grad=parser.sign_grad)
            if use_irl and (not irl_alt or epoch_num % 2 == 1):
                classification_loss, regression_loss, activations = retinanet(
                    [Variable(batch_mixed.to(device)), data['annot']],
                    send_activations=True)
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                noise_loss, distance_loss = irl_obj.compute_losses(
                    model=retinanet,
                    images=data['img'],
                    targets=data['annot'],
                    activations=activations,
                    epoch_num=epoch_num,
                    batch_num=iter_num,
                    training_name=act_file_name,
                    avg_layers=irl_avg)
                regular_loss = classification_loss + regression_loss
                loss = parser.irl_alpha * regular_loss + parser.irl_beta * noise_loss + parser.irl_gamma * distance_loss
            else:
                classification_loss, regression_loss = retinanet(
                    [Variable(batch_mixed.to(device)), data['annot']])
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            if iter_num % 500 == 0:
                if use_irl and (not irl_alt or epoch_num % 2 == 1):
                    print(
                        'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Noise Loss: {:1.5f} | Distance Loss: {:1.5f} | Running loss: {:1.5f}'
                        .format(epoch_num, iter_num,
                                float(parser.irl_alpha * classification_loss),
                                float(parser.irl_alpha * regression_loss),
                                float(parser.irl_beta * noise_loss),
                                float(parser.irl_gamma * distance_loss),
                                np.mean(loss_hist)))
                    del noise_loss
                    del distance_loss
                else:
                    print(
                        'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss

        scheduler.step(np.mean(epoch_loss))

        if epoch_num % parser.checkpoint_interval == 0:
            torch.save(
                retinanet.module,
                f"checkpoints/retina/{training_name}/ckpt_{epoch_num}.pt")
            torch.save(
                {
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict()
                },
                f"checkpoints/retina/{training_name}/optim_scheduler_{epoch_num}.pt"
            )

        if epoch_num % parser.evaluation_interval == 0:
            print("\n------Evaluating model------")
            AP, mAP = csv_eval.evaluate(dataset_val, retinanet)
            print('Epoch: {} | AP: {} | mAP: {}'.format(epoch_num, AP, mAP))
            # write logs of the model to log.txt, format: epoch number, mAP, AP per class
            print(
                f"{epoch_num},{mAP},{AP[0][0]},{AP[1][0]},{AP[2][0]},{AP[3][0]},{AP[4][0]},{AP[5][0]},{AP[6][0]},{AP[7][0]},{AP[8][0]},{AP[9][0]}\n"
            )
            with open(f"checkpoints/retina/{training_name}/log.txt",
                      'a+') as log:
                log.write(
                    f"{epoch_num},{mAP},{AP[0][0]},{AP[1][0]},{AP[2][0]},{AP[3][0]},{AP[4][0]},{AP[5][0]},{AP[6][0]},{AP[7][0]},{AP[8][0]},{AP[9][0]}\n"
                )

        # Evaluating the model on noise now
        if parser.evalute_attacktype and epoch_num % parser.evaluation_attack_interval == 0:
            print("\n-------Evaluating on noise-----")
            AP_n, mAP_n = csv_eval.evaluate(
                dataset_val,
                retinanet,
                perturbed=parser.evalute_attacktype,
                _epsilon=eps)
            print('Noise Epoch: {} | AP: {} | mAP: {}'.format(
                epoch_num, AP_n, mAP_n))
            with open(f"checkpoints/retina/{training_name}/log_attack.txt",
                      'a+') as log:
                log.write(
                    f"{epoch_num},{mAP_n},{AP_n[0][0]},{AP_n[1][0]},{AP_n[2][0]},{AP_n[3][0]},{AP_n[4][0]},{AP_n[5][0]},{AP_n[6][0]},{AP_n[7][0]},{AP_n[8][0]},{AP_n[9][0]}\n"
                )
Exemplo n.º 13
0
def main(args=None):
    parser = argparse.ArgumentParser(description = 'Simple training script for training a RetinaNet network.')
    parser.add_argument('--s', help = 'training session', type = int)
    parser.add_argument('--bs', help = 'batch size', type = int, default = 4)
    parser.add_argument('--lr', help = 'learning rate', type = float, default = 0.001)
    parser.add_argument('--save_int', help = 'interval for saving model', type = int)
    parser.add_argument('--dataset', help = 'Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help = 'Path to COCO directory')
    parser.add_argument('--csv_train', help = 'Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes', help = 'Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help = 'Path to file containing validation annotations (optional, see readme)')
    parser.add_argument('--depth', help = 'Resnet depth, must be one of 18, 34, 50, 101, 152', type = int, default = 50)
    parser.add_argument('--epochs', help = 'Number of epochs', type = int, default = 100)
    parser.add_argument('--use_tb', help = 'whether to use tensorboard', action = 'store_true')
    parser.add_argument('--use_aug', help = 'whether to use data augmentation', action = 'store_true')

    parser = parser.parse_args(args)
    session = parser.s
    session_dir = 'session_{:02d}'.format(session)
    assert os.path.isdir('models'), '[ERROR] models folder not exist'
    assert os.path.isdir('logs'), '[ERROR] logs folder not exist'
    model_dir = os.path.join('models', session_dir)
    logs_dir = os.path.join('logs', session_dir)
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)
    if not os.path.isdir(logs_dir):
        os.mkdir(logs_dir)

    # set up tensorboard logger
    tb_writer = None
    if parser.use_tb:
        tb_writer = SummaryWriter('logs')

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')
        
        if parser.use_aug:
            #transform = transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
            dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform = transforms.Compose([Normalizer(), Augmenter(), ToTensor()]))
             
        else:
            dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform = transforms.Compose([Normalizer(), ToTensor()]))

        dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform = transforms.Compose([Normalizer(), ToTensor()]))
                                  #transform = transforms.Compose([Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
                                   transform=transforms.Compose([Normalizer(), Augmenter(), ToTensor()]))
                                   #transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
                                     transform=transforms.Compose([Normalizer(), Augmenter(), ToTensor()]))
                                     #transform=transforms.Compose([Normalizer(), Resizer()]))

    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train, batch_size = parser.bs, drop_last = False)
    dataloader_train = DataLoader(dataset_train, num_workers = 0, collate_fn = collater, batch_sampler = sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size = parser.bs, drop_last = False)
        dataloader_val = DataLoader(dataset_val, num_workers = 0, collate_fn = collater, batch_sampler = sampler_val)

    print('# classes: {}'.format(dataset_train.num_classes))
    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes = dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes = dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes = dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes = dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes = dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    # disable multi-GPU train
    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr = parser.lr)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience = 3, verbose = True)

    loss_hist = collections.deque(maxlen = 500)

    retinanet.train()
    #retinanet.module.freeze_bn() if DataParallel activated
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        # retinanet.module.freeze_bn() if DataParallel activated
        retinanet.module.freeze_bn()

        epoch_loss = []
        iter_per_epoch = len(dataloader_train)

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                assert data['img'][0].shape[0] == 3, '[ERROR] data first dim should be 3! ({})'.format(data['img'][0].shape)
                # data['img']: (B, C, H, W)
                # data['annot']: [x1, y1, x2, y2, class_id]
                classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                # epoch starts from 0
                if (iter_num + 1) % 1 == 0:
                    print(
                        'Epoch: {} | Iteration: {} | Total loss: {:1.5f} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                                        epoch_num, iter_num, float(loss), float(classification_loss), float(regression_loss), np.mean(loss_hist)
                                )
                            )
                
                # update tensorboard
                if tb_writer is not None:
                    crt_iter = (epoch_num) * iter_per_epoch + (iter_num + 1)
                    tb_dict = {
                        'total_loss': float(loss),
                        'classification_loss': float(classification_loss),
                        'regression_loss': float(regression_loss)
                    }
                    tb_writer.add_scalars('session_{:02d}/loss'.format(session), tb_dict, crt_iter)

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))
        if (epoch_num + 1) % parser.save_int == 0:
            # retinanet (before DataParallel): <class 'retinanet.model.ResNet'>, no self.module
            # retinanet (after DataParallel): <class 'torch.nn.parallel.data_parallel.DataParallel>, self.module available
            # retinanet.module (after DataParallel): <class 'retinanet.model.ResNet'>
            torch.save(retinanet.module.state_dict(), os.path.join(model_dir, 'retinanet_s{:02d}_e{:03d}.pth'.format(session, epoch_num)))

    if parser.use_tb:
        tb_writer.close()

    retinanet.eval()
    torch.save(retinanet.module.state_dict(), os.path.join(model_dir, 'retinanet_s{:02d}_e{:03d}.pth'.format(session, epoch_num)))
Exemplo n.º 14
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=25)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    # create samplers for both training and validation
    # using muti CPU cores to accelerate data loading

    sampler_train1 = torch.utils.data.SequentialSampler(dataset_train)
    sampler_train2 = torch.utils.data.BatchSampler(sampler_train1,
                                                   batch_size=1,
                                                   drop_last=True)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=10,
                                  collate_fn=collater,
                                  batch_sampler=sampler_train2)

    sampler_val1 = torch.utils.data.SequentialSampler(dataset_val)
    sampler_val2 = torch.utils.data.BatchSampler(sampler_val1,
                                                 batch_size=1,
                                                 drop_last=True)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=10,
                                collate_fn=collater,
                                batch_sampler=sampler_val2)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    # ADAM optimizer
    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    # using tensorboardX to show training process
    writer = SummaryWriter('log')

    iter_sum = 0
    time_sum = 0
    frame_num = 8

    for epoch_num in range(parser.epochs):

        # only work for frame_num > 8
        frame_list = collections.deque(maxlen=frame_num)
        anno_list = collections.deque(maxlen=frame_num)

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for index, data in enumerate(dataloader_train):
            try:

                frame_list.append(data['img'])
                anno_list.append(data['annot'])

                # if frame_num != 32:
                if index < 31:
                    continue
                if index >= 697 and index <= 697 + 32:
                    continue

                # real_frame is the frame we used for fish detection
                # It's the last frame in the batch group
                real_frame = frame_list[-1]

                # the annotation for real_frame
                annot = anno_list[-1]

                # drop useless frames
                data['img'] = torch.cat(list(frame_list), dim=0)

                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet([
                    data['img'].cuda().float(),
                    real_frame.cuda().float(),
                    annot.cuda().float()
                ])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                writer.add_scalar('loss_hist', np.mean(loss_hist), iter_sum)
                writer.add_scalar('classification_loss',
                                  float(classification_loss), iter_sum)
                writer.add_scalar('regression_loss', float(regression_loss),
                                  iter_sum)
                writer.add_scalar('loss', float(loss), iter_sum)

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, index, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
                iter_sum = iter_sum + 1
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            # evaluate coco
            coco_eval.evaluate_coco(dataset_val, dataloader_val, retinanet,
                                    frame_num)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            'checkpoint/{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'save/model_final.pt')

    writer.close()
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        type=str,
                        default='csv',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        type=str,
        default=
        r'/usr/idip/idip/liuan/project/pytorch_retinanet/RetinaNet-PFA-SPANet/train.csv',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument(
        '--csv_classes',
        type=str,
        default=
        r'/usr/idip/idip/liuan/project/pytorch_retinanet/RetinaNet-PFA-SPANet/class.csv',
        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        type=str,
        default=
        r'/usr/idip/idip/liuan/project/pytorch_retinanet/RetinaNet-PFA-SPANet/val.csv',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument(
        '--model_save_path',
        type=str,
        default=
        r'/usr/idip/idip/liuan/project/pytorch_retinanet/RetinaNet-PFA-SPANet/model/resnet101+PFA+CFPN/',
        help='Path to save model')

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=101)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=150)
    parser.add_argument('--iter_num',
                        help='Iter number of saving checkpoint',
                        type=int,
                        default=5)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    # 将自定义的Dataset根据batch size大小、是否shuffle等封装成一个Batch Size大小的Tensor,用于后面的训练
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    # add gap save model count variable
    n = 0

    for epoch_num in range(parser.epochs):
        n += 1

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            # try:
            optimizer.zero_grad()

            if torch.cuda.is_available():
                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])
            else:
                classification_loss, regression_loss = retinanet(
                    [data['img'].float(), data['annot']])

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                .format(epoch_num, iter_num, float(classification_loss),
                        float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss
            # except Exception as e:
            #     print(e)
            #     continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        if n % parser.iter_num == 0:
            torch.save(
                retinanet.module, parser.model_save_path + '/' +
                '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, parser.model_save_path + '/' + 'model_final.pt')
Exemplo n.º 16
0
def main():
    global args, results, val_image_ids, logger

    args = parse().parse_args()

    try:
        os.makedirs(args.logdir, exist_ok=True)
    except Exception as exc:
        raise exc

    log_file = os.path.join(args.logdir, "train.log")
    logger = get_logger(__name__, log_file)

    try:
        init_distributed_mode(args)
        distributed = True
    except KeyError:
        args.rank = 0
        distributed = False

    if args.dist_mode == "DP":
        distributed = True
        args.rank = 0

    if args.rank == 0:
        logger.info(f"distributed mode: {args.dist_mode if distributed else 'OFF'}")

    if args.val_image_dir is None:
        if args.rank == 0:
            logger.info(
                "No validation image directory specified, will assume the same image directory for train and val"
            )
        args.val_image_dir = args.image_dir

    writer = SummaryWriter(logdir=args.logdir)
    img_dim = parse_resize(args.resize)
    if args.rank == 0:
        logger.info(f"training image dimensions: {img_dim[0]},{img_dim[1]}")
    ## print out basic info
    if args.rank == 0:
        logger.info("CUDA available: {}".format(torch.cuda.is_available()))
        logger.info(f"torch.__version__ = {torch.__version__}")

    # Create the data loaders
    if args.dataset == "coco":

        # if args.coco_path is None:
        #     raise ValueError("Must provide --coco_path when training on COCO,")
        train_transforms = [Normalizer()]

        if args.augs is None:
            train_transforms.append(Resizer(img_dim))
        else:
            p = 0.5
            if args.augs_prob is not None:
                p = args.augs_prob
            aug_map = get_aug_map(p=p)
            for aug in args.augs:
                if aug in aug_map.keys():
                    train_transforms.append(aug_map[aug])
                else:
                    logger.info(f"{aug} is not available.")
            train_transforms.append(Resizer(img_dim))

        if args.rank == 0:
            if len(train_transforms) == 2:
                logger.info(
                    "Not applying any special augmentations, using only {}".format(train_transforms)
                )
            else:
                logger.info(
                    "Applying augmentations {} with probability {}".format(train_transforms, p)
                )
        dataset_train = CocoDataset(
            args.image_dir, args.train_json_path, transform=transforms.Compose(train_transforms),
        )

    elif args.dataset == "csv":

        if args.csv_train is None:
            raise ValueError("Must provide --csv_train when training on COCO,")

        if args.csv_classes is None:
            raise ValueError("Must provide --csv_classes when training on COCO,")

        dataset_train = CSVDataset(
            train_file=args.csv_train,
            class_list=args.csv_classes,
            # transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]),
            transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(img_dim)]),
        )

        if args.csv_val is None:
            dataset_val = None
            print("No validation annotations provided.")
        else:
            dataset_val = CSVDataset(
                train_file=args.csv_val,
                class_list=args.csv_classes,
                # transform=transforms.Compose([Normalizer(), Resizer()]),
                transform=transforms.Compose([Normalizer(), Resizer(img_dim)]),
            )

    else:
        raise ValueError("Dataset type not understood (must be csv or coco), exiting.")

    if dist.is_available() and distributed and args.dist_mode == "DDP":
        sampler = DistributedSampler(dataset_train)
        dataloader_train = DataLoader(
            dataset_train,
            sampler=sampler,
            batch_size=args.batch_size,
            num_workers=args.num_workers,
            collate_fn=collater,
        )

    elif args.nsr is not None:
        logger.info(f"using WeightedRandomSampler with negative (image) sample rate = {args.nsr}")
        weighted_sampler = WeightedRandomSampler(
            dataset_train.weights, len(dataset_train), replacement=True
        )
        dataloader_train = DataLoader(
            dataset_train,
            num_workers=args.num_workers,
            collate_fn=collater,
            sampler=weighted_sampler,
            batch_size=args.batch_size,
            pin_memory=True,
        )

    else:
        sampler = AspectRatioBasedSampler(
            dataset_train, batch_size=args.batch_size, drop_last=False
        )
        dataloader_train = DataLoader(
            dataset_train,
            num_workers=args.num_workers,
            collate_fn=collater,
            batch_sampler=sampler,
            pin_memory=True,
        )

    if args.val_json_path is not None:
        dataset_val = CocoDataset(
            args.val_image_dir,
            args.val_json_path,
            transform=transforms.Compose([Normalizer(), Resizer(img_dim)]),
            return_ids=True,
        )

    # Create the model
    if args.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif args.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif args.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif args.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif args.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError("Unsupported model depth, must be one of 18, 34, 50, 101, 152")

    # Load checkpoint if provided.
    retinanet = load_checkpoint(retinanet, args.weights, args.depth)

    use_gpu = True

    if torch.cuda.is_available():
        if dist.is_available() and distributed:
            if args.dist_mode == "DDP":
                retinanet = nn.SyncBatchNorm.convert_sync_batchnorm(retinanet)
                retinanet = retinanet.cuda()
            elif args.dist_mode == "DP":
                retinanet = torch.nn.DataParallel(retinanet).cuda()
            else:
                raise NotImplementedError
        else:
            torch.cuda.set_device(torch.device("cuda:0"))
            retinanet = retinanet.cuda()

    # swav = torch.load("/home/bishwarup/Desktop/swav_ckp-50.pth", map_location=torch.device("cpu"))[
    #     "state_dict"
    # ]
    # swav_dict = collections.OrderedDict()
    # for k, v in swav.items():
    #     k = k[7:]  # discard the module. part
    #     if k in retinanet.state_dict():
    #         swav_dict[k] = v
    # logger.info(f"SwAV => {len(swav_dict)} keys matched")
    # model_dict = copy.deepcopy(retinanet.state_dict())
    # model_dict.update(swav_dict)
    # retinanet.load_state_dict(model_dict)

    # if use_gpu:
    #     if torch.cuda.is_available():

    # if torch.cuda.is_available():
    #     retinanet = torch.nn.DataParallel(retinanet).cuda()
    # else:
    #     retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=0.001)
    # optimizer = torch.optim.SGD(
    #     retinanet.parameters(), lr=4.2, momentum=0.9, weight_decay=1e-4,
    # )

    if dist.is_available() and distributed and args.dist_mode == "DDP":
        optimizer = LARC(optimizer=optimizer, trust_coefficient=0.001, clip=True)

    # optimizer = optim.SGD(retinanet.parameters(), lr=0.0001, momentum=0.95)

    # scheduler = optim.lr_scheduler.CosineAnnealingLR(
    #     optimizer, T_max=args.epochs, eta_min=1e-6
    # )

    warmup_lr_schedule = np.linspace(
        args.start_warmup, args.base_lr, len(dataloader_train) * args.warmup_epochs
    )
    iters = np.arange(len(dataloader_train) * (args.epochs - args.warmup_epochs))
    cosine_lr_schedule = np.array(
        [
            args.final_lr
            + 0.5
            * (args.base_lr - args.final_lr)
            * (
                    1
                    + math.cos(
                math.pi * t / (len(dataloader_train) * (args.epochs - args.warmup_epochs))
            )
            )
            for t in iters
        ]
    )
    lr_schedule = np.concatenate((warmup_lr_schedule, cosine_lr_schedule))

    if distributed and dist.is_available() and args.dist_mode == "DDP":
        retinanet = nn.parallel.DistributedDataParallel(
            retinanet, device_ids=[args.gpu_to_work_on], find_unused_parameters=True
        )

    # scheduler_warmup = GradualWarmupScheduler(
    #     optimizer, multiplier=100, total_epoch=5, after_scheduler=scheduler
    # )
    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
    # scheduler = optim.lr_scheduler.OneCycleLR(
    #     optimizer,
    #     max_lr=1e-4,
    #     total_steps=args.epochs * len(dataloader_train),
    #     pct_start=0.2,
    #     max_momentum=0.95,
    # )

    loss_hist = collections.deque(maxlen=500)

    if dist.is_available() and distributed:
        retinanet.module.train()
        retinanet.module.freeze_bn()
    else:
        retinanet.train()
        retinanet.freeze_bn()
    # retinanet.module.freeze_bn()
    if args.rank == 0:
        logger.info("Number of training images: {}".format(len(dataset_train)))
        if dataset_val is not None:
            logger.info("Number of validation images: {}".format(len(dataset_val)))

    # scaler = amp.GradScaler()
    global best_map
    best_map = 0
    n_iter = 0

    scaler = amp.GradScaler(enabled=True)
    global keep_pbar
    keep_pbar = not (distributed and args.dist_mode == "DDP")

    for epoch_num in range(args.epochs):

        # scheduler_warmup.step(epoch_num)
        if dist.is_available() and distributed:
            if args.dist_mode == "DDP":
                dataloader_train.sampler.set_epoch(epoch_num)
            retinanet.module.train()
            retinanet.module.freeze_bn()
        else:
            retinanet.train()
            retinanet.freeze_bn()
        # retinanet.module.freeze_bn()

        epoch_loss = []
        results = []
        val_image_ids = []

        pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train), leave=keep_pbar)
        for iter_num, data in pbar:
            n_iter = epoch_num * len(dataloader_train) + iter_num

            for param_group in optimizer.param_groups:
                lr = lr_schedule[n_iter]
                param_group["lr"] = lr

            optimizer.zero_grad()

            if torch.cuda.is_available():
                with amp.autocast(enabled=False):
                    classification_loss, regression_loss = retinanet(
                        [data["img"].cuda().float(), data["annot"].cuda()]
                    )
            else:
                classification_loss, regression_loss = retinanet(
                    [data["img"].float(), data["annot"]]
                )

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()
            loss = classification_loss + regression_loss
            # for param_group in optimizer.param_groups:
            #     lr = param_group["lr"]

            if args.rank == 0:
                writer.add_scalar("Learning rate", lr, n_iter)
            pbar_desc = f"Epoch: {epoch_num} | lr = {lr:0.6f} | batch: {iter_num} | cls: {classification_loss:.4f} | reg: {regression_loss:.4f}"
            pbar.set_description(pbar_desc)
            pbar.update(1)
            if bool(loss == 0):
                continue

            # loss.backward()
            scaler.scale(loss).backward()

            # unscale the gradients for grad clipping
            scaler.unscale_(optimizer)

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

            # optimizer.step()
            # scheduler.step()  # one cycle lr operates at batch level
            scaler.step(optimizer)
            scaler.update()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            del classification_loss
            del regression_loss

        if args.dataset == "coco":

            # print("Evaluating dataset")
            # if args.plot:
            #     stats = coco_eval.evaluate_coco(
            #         dataset_val,
            #         retinanet,
            #         args.logdir,
            #         args.batch_size,
            #         args.num_workers,
            #         writer,
            #         n_iter,
            #     )
            # else:
            #     stats = coco_eval.evaluate_coco(
            #         dataset_val,
            #         retinanet,
            #         args.logdir,
            #         args.batch_size,
            #         args.num_workers,
            #     )
            if len(dataset_val) > 0:
                if dist.is_available() and distributed and args.dist_mode == "DDP":
                    sampler_val = DistributedSampler(dataset_val)
                    dataloader_val = DataLoader(
                        dataset_val,
                        sampler=sampler_val,
                        batch_size=args.batch_size,
                        num_workers=args.num_workers,
                        collate_fn=eval_collate,
                        pin_memory=True,
                    )
                else:
                    dataloader_val = DataLoader(
                        dataset_val,
                        batch_size=args.batch_size,
                        num_workers=args.num_workers,
                        collate_fn=eval_collate,
                        pin_memory=True,
                        drop_last=False,
                    )

            validate(retinanet, dataset_val, dataloader_val)

            if args.rank == 0:
                if len(results):
                    with open(os.path.join(args.logdir, "val_bbox_results.json"), "w") as f:
                        json.dump(results, f, indent=4)
                    stats = coco_eval.evaluate_coco(dataset_val, val_image_ids, args.logdir)
                    map_avg, map_50, map_75, map_small = stats[:4]
                else:
                    map_avg, map_50, map_75, map_small = [-1] * 4

                if map_50 > best_map:
                    torch.save(
                        retinanet.state_dict(),
                        os.path.join(args.logdir, f"retinanet_resnet{args.depth}_best.pt"),
                    )
                    best_map = map_50
                writer.add_scalar("eval/[email protected]:0.95", map_avg, epoch_num * len(dataloader_train))
                writer.add_scalar("eval/[email protected]", map_50, epoch_num * len(dataloader_train))
                writer.add_scalar("eval/[email protected]", map_75, epoch_num * len(dataloader_train))
                writer.add_scalar("eval/map_small", map_small, epoch_num * len(dataloader_train))
                logger.info(
                    f"Epoch: {epoch_num} | lr = {lr:.6f} |[email protected]:0.95 = {map_avg:.4f} | [email protected] = {map_50:.4f} | [email protected] = {map_75:.4f} | map-small = {map_small:.4f}"
                )

        elif args.dataset == "csv" and args.csv_val is not None:

            # logger.info("Running eval...")

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        # scheduler.step(np.mean(epoch_loss))
        # scheduler.step()
        # torch.save(retinanet.module, os.path.join(args.logdir, f"retinanet_{epoch_num}.pt"))

    retinanet.eval()
Exemplo n.º 17
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description="Simple training script for training a RetinaNet network."
    )

    parser.add_argument("--dataset", help="Dataset type, must be one of csv or coco.")
    parser.add_argument("--model", default=None, help="Path to trained model")
    parser.add_argument("--coco_path", help="Path to COCO directory")
    parser.add_argument(
        "--csv_train", help="Path to file containing training annotations (see readme)"
    )
    parser.add_argument(
        "--csv_classes", help="Path to file containing class list (see readme)"
    )
    parser.add_argument(
        "--csv_val",
        help="Path to file containing validation annotations (optional, see readme)",
    )

    parser.add_argument(
        "--depth",
        help="Resnet depth, must be one of 18, 34, 50, 101, 152",
        type=int,
        default=50,
    )
    parser.add_argument("--epochs", help="Number of epochs", type=int, default=100)
    parser.add_argument(
        "--result_dir",
        default="results",
        help="Path to store training results",
        type=str,
    )
    parser.add_argument(
        "--batch_num", default=8, help="Number of samples in a batch", type=int
    )

    parser = parser.parse_args(args)

    print(parser)

    # parameters
    BATCH_SIZE = parser.batch_num
    IMAGE_MIN_SIDE = 1440
    IMAGE_MAX_SIDE = 2560

    # Create the data loaders
    if parser.dataset == "coco":

        if parser.coco_path is None:
            raise ValueError("Must provide --coco_path when training on COCO,")
        # TODO: parameterize arguments for Resizer, and other transform functions
        # resizer: min_side=608, max_side=1024
        dataset_train = CocoDataset(
            parser.coco_path,
            # set_name="train2017",
            set_name="train_images_full",
            transform=transforms.Compose(
                [Normalizer(), Augmenter(), Resizer(passthrough=True),]
            ),
        )
        dataset_val = CocoDataset(
            parser.coco_path,
            # set_name="val2017",
            set_name="val_images_full",
            transform=transforms.Compose([Normalizer(), Resizer(passthrough=True),]),
        )

    elif parser.dataset == "csv":

        if parser.csv_train is None:
            raise ValueError("Must provide --csv_train when training on COCO,")

        if parser.csv_classes is None:
            raise ValueError("Must provide --csv_classes when training on COCO,")

        dataset_train = CSVDataset(
            train_file=parser.csv_train,
            class_list=parser.csv_classes,
            transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]),
        )

        if parser.csv_val is None:
            dataset_val = None
            print("No validation annotations provided.")
        else:
            dataset_val = CSVDataset(
                train_file=parser.csv_val,
                class_list=parser.csv_classes,
                transform=transforms.Compose([Normalizer(), Resizer()]),
            )

    else:
        raise ValueError("Dataset type not understood (must be csv or coco), exiting.")

    sampler = AspectRatioBasedSampler(
        dataset_train, batch_size=BATCH_SIZE, drop_last=False
    )
    dataloader_train = DataLoader(
        dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler
    )

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(
            dataset_val, batch_size=BATCH_SIZE, drop_last=False
        )
        dataloader_val = DataLoader(
            dataset_val, num_workers=16, collate_fn=collater, batch_sampler=sampler_val
        )

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(
            num_classes=dataset_train.num_classes(), pretrained=True
        )
    elif parser.depth == 34:
        retinanet = model.resnet34(
            num_classes=dataset_train.num_classes(), pretrained=True
        )
    elif parser.depth == 50:
        retinanet = model.resnet50(
            num_classes=dataset_train.num_classes(), pretrained=True
        )
    elif parser.depth == 101:
        retinanet = model.resnet101(
            num_classes=dataset_train.num_classes(), pretrained=True
        )
    elif parser.depth == 152:
        retinanet = model.resnet152(
            num_classes=dataset_train.num_classes(), pretrained=True
        )
    else:
        raise ValueError("Unsupported model depth, must be one of 18, 34, 50, 101, 152")

    if parser.model:
        retinanet = torch.load(parser.model)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, patience=3, verbose=True
    )

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print("Num training images: {}".format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        p_bar = tqdm(dataloader_train)
        for iter_num, data in enumerate(p_bar):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data["img"].cuda().float(), data["annot"]]
                    )
                else:
                    classification_loss, regression_loss = retinanet(
                        [data["img"].float(), data["annot"]]
                    )

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                mean_loss = np.mean(loss_hist)
                p_bar.set_description(
                    f"Epoch: {epoch_num} | Iteration: {iter_num} | "
                    f"Class loss: {float(classification_loss.item()):.5f} | "
                    f"Regr loss: {float(regression_loss.item()):.5f} | "
                    f"Running loss: {mean_loss:.5f}"
                )

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == "coco":

            print("Evaluating dataset")

            coco_eval.evaluate_coco(
                dataset_val, retinanet, result_dir=parser.result_dir
            )

        elif parser.dataset == "csv" and parser.csv_val is not None:

            print("Evaluating dataset")

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        # TODO: Fix string formating mix (adopt homogeneous format)
        torch.save(
            retinanet.module,
            f"{parser.result_dir}/"
            + "{}_retinanet_{}.pt".format(parser.dataset, epoch_num),
        )

    retinanet.eval()

    torch.save(retinanet, "model_final.pt")
Exemplo n.º 18
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
    parser.add_argument('--iou',default='05')
    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path, set_name='train2017',
                                    transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
                                  transform=transforms.Compose([Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
                                   transform=transforms.Compose([Normalizer(), Resizer()]))
        val_dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
                                   transform=transforms.Compose([Normalizer(),  Resizer()]))
        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
                                     transform=transforms.Compose([Normalizer(), Resizer()]))

    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=8, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=5e-5)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
    multistep_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,8,11,20], gamma=0.2)

    loss_hist = collections.deque(maxlen=500)
    val_loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        val_epoch_loss=[]

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
                else:
                    classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']])
                    
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()
                
                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Train: Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f} | Epoch loss: {:1.5f} '.format(
                        epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist),epoch_loss[-1]))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue
        
        
        for iter_num, data in enumerate(dataloader_val):
            try:
                #optimizer.zero_grad()
                #retinanet.eval()
                with torch.no_grad():
                    if torch.cuda.is_available():
                        classification_loss, regression_loss = retinanet((data['img'].cuda().float(), data['annot']))
                    else:
                        classification_loss, regression_loss = retinanet((data['img'].float(), data['annot']))
                        
                    classification_loss = classification_loss.mean()
                    regression_loss = regression_loss.mean()

                    loss = classification_loss + regression_loss

                    if bool(loss == 0):
                        continue

                    #loss.backward()

                    #torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                    #optimizer.step()

                    val_loss_hist.append(float(loss))

                    val_epoch_loss.append(float(loss))

                print(
                    'Val: Epoch: {} |  Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f} | Epoch loss: {:1.5f} '.format(
                        epoch_num,  float(classification_loss), float(regression_loss), np.mean(val_loss_hist),val_epoch_loss[-1]))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')
            #mAP_train = csv_eval.evaluate(val_dataset_train,retinanet,iou_threshold=float(parser.iou)/10)
            mAP_val = csv_eval.evaluate(dataset_val, retinanet,iou_threshold=float(parser.iou)/10)
            #writer.add_scalar('train_mAP_Questions',mAP_train[0][0],epoch_num)
            writer.add_scalar('val_mAP_Questions', mAP_val[0][0], epoch_num)
            writer.add_scalar('val_loss',np.mean(val_epoch_loss),epoch_num)
            writer.add_scalar('train_loss',np.mean(epoch_loss),epoch_num)
        lr_scheduler.step(np.mean(epoch_loss))
        #one_scheduler.step()
        multistep_scheduler.step()
        torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.iou, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt')
Exemplo n.º 19
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--exp_name',
                        help='Path to folder for saving the model and log',
                        type=str)
    parser.add_argument('--output_folder',
                        help='Path to folder for saving all the experiments',
                        type=str)

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)  # 100
    parser.add_argument('--batch_size', help='Batch size', type=int, default=2)
    parser.add_argument('--lr',
                        help='Number of epochs',
                        type=float,
                        default=1e-5)
    parser.add_argument('--caption',
                        help='Any thing in particular about the experiment',
                        type=str)
    parser.add_argument('--server',
                        help='seerver name',
                        type=str,
                        default='ultron')
    parser.add_argument('--detector',
                        help='detection algo',
                        type=str,
                        default='RetinaNet')
    parser.add_argument('--arch', help='model architecture', type=str)
    parser.add_argument('--pretrain', default=False, action='store_true')
    parser.add_argument('--freeze_batchnorm',
                        default=False,
                        action='store_true')

    parser = parser.parse_args(args)

    output_folder_path = os.path.join(parser.output_folder, parser.exp_name)
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    PARAMS = {
        'dataset': parser.dataset,
        'exp_name': parser.exp_name,
        'depth': parser.depth,
        'epochs': parser.epochs,
        'batch_size': parser.batch_size,
        'lr': parser.lr,
        'caption': parser.caption,
        'server': parser.server,
        'arch': parser.arch,
        'pretrain': parser.pretrain,
        'freeze_batchorm': parser.freeze_batchnorm
    }

    exp = neptune.create_experiment(
        name=parser.exp_name,
        params=PARAMS,
        tags=[parser.arch, parser.detector, parser.dataset, parser.server])

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18 and parser.arch == 'Resnet':
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=parser.pretrain)
    elif parser.depth == 10 and parser.arch == 'Resnet':
        retinanet = model.resnet10(num_classes=dataset_train.num_classes(),
                                   pretrained=parser.pretrain)
    elif parser.depth == 18 and parser.arch == 'BiRealNet18':
        checkpoint_path = None
        if parser.pretrain:
            checkpoint_path = '/media/Rozhok/Bi-Real-net/pytorch_implementation/BiReal18_34/models/imagenet_baseline/checkpoint.pth.tar'
        retinanet = birealnet18(checkpoint_path,
                                num_classes=dataset_train.num_classes())
    elif parser.depth == 34 and parser.arch == 'Resnet':
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=parser.pretrain)
    elif parser.depth == 50 and parser.arch == 'Resnet':
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=parser.pretrain)
    elif parser.depth == 101 and parser.arch == 'Resnet':
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=parser.pretrain)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=parser.pretrain)
    elif parser.arch == 'ofa':
        print("Model is ResNet50D.")
        bn_momentum = 0.1
        bn_eps = 1e-5
        retinanet = ResNet50D(
            n_classes=dataset_train.num_classes(),
            bn_param=(bn_momentum, bn_eps),
            dropout_rate=0,
            width_mult=1.0,
            depth_param=3,
            expand_ratio=0.35,
        )

    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    print(retinanet)

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    if parser.freeze_batchnorm:
        retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        exp.log_metric('Current lr', float(optimizer.param_groups[0]['lr']))
        exp.log_metric('Current epoch', int(epoch_num))

        retinanet.train()
        if parser.freeze_batchnorm:
            retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):

            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot']])
                else:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                exp.log_metric('Training: Classification loss',
                               float(classification_loss))
                exp.log_metric('Training: Regression loss',
                               float(regression_loss))
                exp.log_metric('Training: Totalloss', float(loss))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val,
                                    retinanet,
                                    output_folder_path,
                                    exp=exp)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            os.path.join(
                output_folder_path,
                '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)))

    retinanet.eval()

    torch.save(retinanet, os.path.join(output_folder_path, 'model_final.pt'))
Exemplo n.º 20
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset', default='csv', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_train', default='dataset/pascal_train.csv', help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes', default='dataset/classes.csv', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', default='dataset/pascal_val.csv', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)
    parser.add_argument('--weights_folder', help='path to save weight', type=str, required=True)


    parser = parser.parse_args(args)
    if not os.path.exists(parser.weights_folder):
        os.makedirs(parser.weights_folder)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path, set_name='train2017',
                                    transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
        dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
                                  transform=transforms.Compose([Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
                                   transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
                                     transform=transforms.Compose([Normalizer(), Resizer()]))

    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train, batch_size=5, drop_last=False)
    dataloader_train = DataLoader(dataset_train, num_workers=4, collate_fn=collater, batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=8, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=4, collate_fn=collater, batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    # import ipdb; ipdb.set_trace()
    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        total_loss = 0
        total_regression_loss = 0
        total_classification_loss = 0
        with tqdm(dataloader_train, unit="batch") as tepoch:
            for data in tepoch:
            # for iter_num, data in tepoch:#enumerate(dataloader_train):
                tepoch.set_description(f"Epoch {epoch_num}")
                try:
                    optimizer.zero_grad()

                    if torch.cuda.is_available():
                        classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
                    else:
                        classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']])
                        
                    classification_loss = classification_loss.mean()
                    regression_loss = regression_loss.mean()

                    loss = classification_loss + regression_loss

                    total_loss = total_loss + loss
                    total_regression_loss = total_regression_loss + regression_loss
                    total_classification_loss = total_classification_loss + classification_loss

                    if bool(loss == 0):
                        continue

                    loss.backward()

                    torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                    optimizer.step()

                    loss_hist.append(float(loss))

                    epoch_loss.append(float(loss))

                    # print(
                        # 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                        #     epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
                    tepoch.set_postfix(cls_loss="{:1.5f}".format(classification_loss), reg_loss="{:1.5f}".format(regression_loss))
                    time.sleep(0.1)
                    del classification_loss
                    del regression_loss
                except Exception as e:
                    print(e)
                    continue
        tb.add_scalar('Training loss', total_loss, epoch_num)
        tb.add_scalar('Training regression loss', total_regression_loss, epoch_num)
        tb.add_scalar('Training accuracy loss', total_classification_loss, epoch_num)
        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            
            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module, '{}/{}_retinanet_{}.pt'.format(parser.weights_folder,parser.dataset, epoch_num))


    retinanet.eval()

    torch.save(retinanet, '{}/model_final.pt'.format(parser.weights_folder))
Exemplo n.º 21
0
def main(args=None):
	parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
	parser.add_argument('--coco_path', help='Path to COCO directory')
	parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
	parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
	parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

	parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
	parser.add_argument('--config', help='Config file path that contains scale and ratio values', type=str)
	parser.add_argument('--epochs', help='Number of epochs', type=int, default=50)
	parser.add_argument('--init-lr', help='Initial learning rate for training process', type=float, default=1e-3)
	parser.add_argument('--batch-size', help='Number of input images per step', type=int, default=1)
	parser.add_argument('--num-workers', help='Number of worker used in dataloader', type=int, default=1)

	# For resuming training from saved checkpoint
	parser.add_argument('--resume', help='Whether to resume training from checkpoint', action='store_true')
	parser.add_argument('--saved-ckpt', help='Resume training from this checkpoint', type=str)

	parser.add_argument('--multi-gpus', help='Allow to use multi gpus for training task', action='store_true')
	parser.add_argument('--snapshots', help='Location to save training snapshots', type=str, default="snapshots")

	parser.add_argument('--log-dir', help='Location to save training logs', type=str, default="logs")
	parser.add_argument('--expr-augs', help='Allow to use use experiment augmentation methods', action='store_true')
	parser.add_argument('--aug-methods', help='(Experiment) Augmentation methods to use, separate by comma symbol', type=str, default="rotate,hflip,brightness,contrast")
	parser.add_argument('--aug-prob', help='Probability of applying (experiment) augmentation in range [0.,1.]', type=float, default=0.5)

	parser = parser.parse_args(args)

	train_transforms = [Normalizer(), Resizer(), Augmenter()]

	# Define transform methods
	if parser.expr_augs:
		aug_map = get_aug_map(p=parser.aug_prob)
		aug_methods = parser.aug_methods.split(",")
		for aug in aug_methods:
			if aug in aug_map.keys():
				train_transforms.append(aug_map[aug])
			else:
				print(f"{aug} is not available.")

	# Create the data loaders
	if parser.dataset == 'coco':

		if parser.coco_path is None:
			raise ValueError('Must provide --coco_path when training on COCO,')

		dataset_train = CocoDataset(parser.coco_path, set_name='train2017',
									transform=transforms.Compose(train_transforms))
		dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
								  transform=transforms.Compose([Normalizer(), Resizer()]))

	elif parser.dataset == 'csv':

		if parser.csv_train is None:
			raise ValueError('Must provide --csv_train when training on COCO,')

		if parser.csv_classes is None:
			raise ValueError('Must provide --csv_classes when training on COCO,')

		dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes,
								   transform=transforms.Compose(train_transforms))

		if parser.csv_val is None:
			dataset_val = None
			print('No validation annotations provided.')
		else:
			dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes,
									 transform=transforms.Compose([Normalizer(), Resizer()]))

	else:
		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

	sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False)
	dataloader_train = DataLoader(dataset_train, num_workers=parser.num_workers, collate_fn=collater, batch_sampler=sampler)

	if dataset_val is not None:
		sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=parser.batch_size, drop_last=False)
		dataloader_val = DataLoader(dataset_val, num_workers=parser.num_workers, collate_fn=collater, batch_sampler=sampler_val)

	config = dict({"scales": None,
					"ratios": None})
	
	if parser.config:
		config = load_config(parser.config, config)

	if parser.depth == 18:
		retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"])
	elif parser.depth == 34:
		retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"])
	elif parser.depth == 50:
		retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"])
	elif parser.depth == 101:
		retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"])
	elif parser.depth == 152:
		retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"])
	else:
		raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

	optimizer = optim.Adam(retinanet.parameters(), lr=parser.init_lr)

	if parser.resume:
		if not parser.saved_ckpt:
			print("No saved checkpoint provided for resuming training. Exiting now...")
			return 
		if not os.path.exists(parser.saved_ckpt):
			print("Invalid saved checkpoint path. Exiting now...")
			return

		# Restore last state
		retinanet, optimizer, start_epoch = load_ckpt(parser.saved_ckpt, retinanet, optimizer)
		if parser.epochs <= start_epoch:
			print("Number of epochs must be higher than number of trained epochs of saved checkpoint.")
			return

	use_gpu = True

	if use_gpu:
		print("Using GPU for training process")
		if torch.cuda.is_available():
			if parser.multi_gpus:
				print("Using multi-gpus for training process")
				retinanet = torch.nn.DataParallel(retinanet.cuda(), device_ids=[0,1])
			else:
				retinanet = torch.nn.DataParallel(retinanet.cuda())
	else:
		retinanet = torch.nn.DataParallel(retinanet)

	retinanet.training = True

	scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True)

	loss_hist = collections.deque(maxlen=500)

	retinanet.train()
	retinanet.module.freeze_bn()

	print('Num training images: {}'.format(len(dataset_train)))

	# Tensorboard writer
	writer = SummaryWriter(parser.log_dir)

	# Save snapshots dir
	if not os.path.exists(parser.snapshots):
		os.makedirs(parser.snapshots)

	best_mAP = 0
	start_epoch = 0 if not parser.resume else start_epoch 

	for epoch_num in range(start_epoch, parser.epochs):

		retinanet.train()
		retinanet.module.freeze_bn()

		epoch_loss = []
		epoch_csf_loss = []
		epoch_reg_loss = []

		for iter_num, data in enumerate(dataloader_train):
			try:
				optimizer.zero_grad()

				if torch.cuda.is_available():
					with torch.cuda.device(0):
						classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
				else:
					classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']])
					
				classification_loss = classification_loss.mean()
				regression_loss = regression_loss.mean()

				loss = classification_loss + regression_loss
				epoch_csf_loss.append(float(classification_loss))
				epoch_reg_loss.append(float(regression_loss))

				if bool(loss == 0):
					continue

				loss.backward()

				torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

				optimizer.step()

				loss_hist.append(float(loss))

				epoch_loss.append(float(loss))

				print(
					'\rEpoch: {}/{} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
						(epoch_num + 1), parser.epochs, (iter_num + 1), len(dataloader_train), float(classification_loss), float(regression_loss), np.mean(loss_hist)), end='')

				del classification_loss
				del regression_loss
			except Exception as e:
				print(e)
				continue

		# writer.add_scalar("Loss/train", loss, epoch_num)

		_epoch_loss = np.mean(epoch_loss)
		_epoch_csf_loss = np.mean(epoch_reg_loss)
		_epoch_reg_loss = np.mean(epoch_reg_loss)

		if parser.dataset == 'coco':

			print('Evaluating dataset')

			coco_eval.evaluate_coco(dataset_val, retinanet)

			scheduler.step(_epoch_loss)

		elif parser.dataset == 'csv' and parser.csv_val is not None:

			print('\nEvaluating dataset')

			APs = csv_eval.evaluate(dataset_val, retinanet)
			mAP = round(mean(APs[ap][0] for ap in APs.keys()), 5)
			print("mAP: %f" %mAP)
			writer.add_scalar("validate/mAP", mAP, epoch_num)
			
			# Handle lr_scheduler wuth mAP value
			scheduler.step(mAP)


		lr = get_lr(optimizer)
		writer.add_scalar("train/classification-loss", _epoch_csf_loss, epoch_num)
		writer.add_scalar("train/regression-loss", _epoch_reg_loss, epoch_num)
		writer.add_scalar("train/loss", _epoch_loss, epoch_num)
		writer.add_scalar("train/learning-rate", lr, epoch_num)

		# Save model file, optimizer and epoch number

		checkpoint = {
		    'epoch': epoch_num,
		    'state_dict': retinanet.state_dict(),
		    'optimizer': optimizer.state_dict(),
		}

		# torch.save(retinanet.module, os.path.join(parser.snapshots, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)))
		
		# Check whether this epoch's model achieves highest mAP value
		is_best = False
		if best_mAP < mAP:
			best_mAP = mAP 
			is_best = True  

		save_ckpt(checkpoint, is_best, parser.snapshots, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num + 1))

		print('\n')

	retinanet.eval()

	torch.save(retinanet, 'model_final.pt')

	writer.flush()
Exemplo n.º 22
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default='csv')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)',
        default='data/train_retinanet.csv')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)',
                        default='data/class_retinanet.csv')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)',
        default='data/val_retinanet.csv')

    parser.add_argument('--model_path',
                        default='coco_resnet_50_map_0_335_state_dict.pt',
                        help='Path to file containing pretrained retinanet')

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs_detection',
                        help='Number of epochs for detection',
                        type=int,
                        default=50)
    parser.add_argument('--epochs_classification',
                        help='Number of epochs for classification',
                        type=int,
                        default=50)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=1,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if parser.model_path is not None:
        print('loading ', parser.model_path)
        if 'coco' in parser.model_path:
            retinanet.load_state_dict(torch.load(parser.model_path),
                                      strict=False)
        else:
            retinanet = torch.load(parser.model_path)
        print('Pretrained model loaded!')

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    #Here training the detection
    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=4,
                                                     verbose=True)
    loss_hist = collections.deque(maxlen=500)
    loss_style_classif = nn.CrossEntropyLoss()

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    mAP_list = []
    mAPbest = 0
    for epoch_num in range(parser.epochs_detection):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    [classification_loss, regression_loss], style = retinanet(
                        [data['img'].cuda().float(), data['annot']])
                else:
                    [classification_loss, regression_loss
                     ], style = retinanet([data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                if torch.cuda.is_available():
                    style_loss = loss_style_classif(
                        style,
                        torch.tensor(data['style']).cuda())
                else:
                    style_loss = loss_style_classif(
                        style, torch.tensor(data['style']))
                loss = classification_loss + regression_loss + style_loss

                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.4f} | Regression loss: {:1.4f} | Style loss: {:1.4f} | Running loss: {:1.4f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), float(style_loss),
                            np.mean(loss_hist)))

                del classification_loss
                del regression_loss
                del style_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':
            print('Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:
            print('Evaluating dataset')
            mAPclasses, mAP, accu = csv_eval.evaluate(dataset_val, retinanet)
            mAP_list.append(mAP)
            print('mAP_list', mAP_list)
        if mAP > mAPbest:
            print('Saving best checkpoint')
            torch.save(retinanet, 'model_best.pt')
            mAPbest = mAP

        scheduler.step(np.mean(epoch_loss))
        torch.save(retinanet.module,
                   '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()
    torch.save(retinanet, 'model_final.pt')

    # Here we aggregate all the data to don't have to appy the Retinanet during training.
    retinanet.load_state_dict(torch.load('model_best.pt').state_dict())
    List_feature = []
    List_target = []
    retinanet.training = False
    retinanet.eval()
    retinanet.module.style_inference = True

    retinanet.module.freeze_bn()

    epoch_loss = []
    with torch.no_grad():
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    _, _, feature_vec = retinanet(data['img'].cuda().float())
                else:
                    _, _, feature_vec = retinanet(data['img'].float())
                List_feature.append(torch.squeeze(feature_vec).cpu())
                List_target.append(data['style'][0])
            except Exception as e:
                print(e)
                continue
    print('END of preparation of the data for classification of style')
    # Here begins Style training. Need to set to style_train. They are using the same loader, as it was expected to train both at the same time.

    batch_size_classification = 64
    dataloader_train_style = torch.utils.data.DataLoader(
        StyleDataset(List_feature, List_target),
        batch_size=batch_size_classification)

    retinanet.load_state_dict(torch.load('model_best.pt').state_dict())

    # Here training the detection

    retinanet.module.style_inference = False
    retinanet.module.style_train(True)
    retinanet.training = True
    retinanet.train()
    optimizer = optim.Adam(
        retinanet.module.styleClassificationModel.parameters(),
        lr=5e-3,
        weight_decay=1e-3)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='max',
                                                     patience=4,
                                                     verbose=True)
    loss_hist = collections.deque(maxlen=500)
    loss_style_classif = nn.CrossEntropyLoss()
    retinanet.train()
    retinanet.module.freeze_bn()
    criterion = nn.CrossEntropyLoss()
    accu_list = []
    accubest = 0
    for epoch_num in range(parser.epochs_classification):

        retinanet.train()
        retinanet.module.freeze_bn()
        epoch_loss = []
        total = 0
        correct = 0
        for iter_num, data in enumerate(dataloader_train_style):
            try:
                optimizer.zero_grad()
                inputs, targets = data
                if torch.cuda.is_available():
                    inputs, targets = inputs.cuda(), targets.cuda()

                outputs = retinanet.module.styleClassificationModel(
                    inputs, 0, 0, 0, True)
                loss = criterion(outputs, targets)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))
                total += targets.size(0)
                _, predicted = torch.max(outputs.data, 1)
                correct += predicted.eq(targets.data).cpu().sum()

                print(
                    '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%'
                    %
                    (epoch_num, parser.epochs_classification, iter_num + 1,
                     (len(dataloader_train_style) // batch_size_classification)
                     + 1, loss.item(), 100. * correct / total))

            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':
            print('Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:
            print('Evaluating dataset')
            mAPclasses, mAP, accu = csv_eval.evaluate(dataset_val, retinanet)
            accu_list.append(accu)
            print('mAP_list', mAP_list, 'accu_list', accu_list)
        if accu > accubest:
            print('Saving best checkpoint')
            torch.save(retinanet.module, 'model_best_classif.pt')
            accubest = accu

        scheduler.step(accu)
        torch.save(retinanet.module,
                   '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()
    torch.save(retinanet.module, 'model_final.pt')
Exemplo n.º 23
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description="Simple training script for training a RetinaNet network.")

    parser.add_argument("--dataset",
                        help="Dataset type, must be one of csv or coco.")
    parser.add_argument("--coco_path", help="Path to COCO directory")
    parser.add_argument(
        "--csv_train",
        help="Path to file containing training annotations (see readme)")
    parser.add_argument("--csv_classes",
                        help="Path to file containing class list (see readme)")
    parser.add_argument(
        "--csv_val",
        help=
        "Path to file containing validation annotations (optional, see readme)",
    )
    parser.add_argument(
        "--depth",
        help="Resnet depth, must be one of 18, 34, 50, 101, 152",
        type=int,
        default=50,
    )
    parser.add_argument("--batch_size", help="Batch size", type=int, default=2)
    parser.add_argument("--epochs",
                        help="Number of epochs",
                        type=int,
                        default=100)
    parser.add_argument("--workers",
                        help="Number of workers of dataleader",
                        type=int,
                        default=4)
    parser = parser.parse_args(args)

    writer = SummaryWriter("logs")

    # Create the data loaders
    if parser.dataset == "coco":

        if parser.coco_path is None:
            raise ValueError("Must provide --coco_path when training on COCO,")

        dataset_train = CocoDataset(
            parser.coco_path,
            set_name="train2017",
            transform=transforms.Compose(
                [Normalizer(), Augmenter(),
                 Resizer()]),
        )
        dataset_val = CocoDataset(
            parser.coco_path,
            set_name="val2017",
            transform=transforms.Compose([Normalizer(),
                                          Resizer()]),
        )

    elif parser.dataset == "csv":

        if parser.csv_train is None:
            raise ValueError("Must provide --csv_train when training on COCO,")

        if parser.csv_classes is None:
            raise ValueError(
                "Must provide --csv_classes when training on COCO,")

        dataset_train = CSVDataset(
            train_file=parser.csv_train,
            class_list=parser.csv_classes,
            transform=transforms.Compose(
                [Normalizer(), Augmenter(),
                 Resizer()]),
        )

        if parser.csv_val is None:
            dataset_val = None
            print("No validation annotations provided.")
        else:
            dataset_val = CSVDataset(
                train_file=parser.csv_val,
                class_list=parser.csv_classes,
                transform=transforms.Compose([Normalizer(),
                                              Resizer()]),
            )

    else:
        raise ValueError(
            "Dataset type not understood (must be csv or coco), exiting.")

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(
        dataset_train,
        num_workers=parser.workers,
        collate_fn=collater,
        batch_sampler=sampler,
    )

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=parser.workers,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            "Unsupported model depth, must be one of 18, 34, 50, 101, 152")

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=10,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print("Num training images: {}".format(len(dataset_train)))

    global_step = 0
    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            global_step = iter_num + epoch_num * len(dataloader_train)

            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data["img"].cuda().float(), data["annot"]])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                if iter_num % 10 == 0:
                    print(
                        "Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}"
                        .format(
                            epoch_num,
                            iter_num,
                            float(classification_loss),
                            float(regression_loss),
                            np.mean(loss_hist),
                        ))

                    writer.add_scalars(
                        "training",
                        {
                            "loss": loss,
                            "loss_cls": classification_loss,
                            "loss_reg": regression_loss,
                        },
                        global_step,
                    )

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == "coco":

            print("Evaluating dataset")

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == "csv" and parser.csv_val is not None:

            print("Evaluating dataset")

            mAP = csv_eval.evaluate(dataset_val, retinanet)

            valid_mAP = [x[0] for x in mAP.values() if x[1] > 0]
            mmAP = sum(valid_mAP) / len(mAP)
            writer.add_scalars("validation", {"mmAP": mmAP}, global_step)

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            "checkpoints/{}_retinanet_{}.pt".format(parser.dataset, epoch_num),
        )

    retinanet.eval()

    torch.save(retinanet, "checkpoints/odel_final.pt")
Exemplo n.º 24
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=150)
    parser.add_argument('--gpu_num', help='default gpu', type=int, default=5)
    parser.add_argument('--saved_dir',
                        help='saved dir',
                        default='trained_models/coco/resnet50/')

    parser = parser.parse_args(args)

    # GPU 할당 변경하기
    GPU_NUM = parser.gpu_num
    device = torch.device(
        f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
    torch.cuda.set_device(device)  # change allocation of current GPU
    print(device)
    print('Current cuda device ', torch.cuda.current_device())  # check
    device_ids = [5, 4, 3, 1, 7]

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=8,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   device=device,
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   device=device,
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   device=device,
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    device=device,
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    device=device,
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.to(device)

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet,
                                          device_ids=[5, 4, 3, 1, 7],
                                          output_device=GPU_NUM).to(device)
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    criterion = FocalLoss(device)
    criterion = criterion.to(device)

    #     optimizer = optim.Adam(retinanet.parameters(), lr = 1e-7)
    #     scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=30, T_mult=2, eta_max=0.0004,  T_up=10, gamma=0.5)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        loss_per_epoch = 2

        start_time = time.time()
        for iter_num, data in enumerate((dataloader_train)):
            try:
                optimizer.zero_grad()

                if torch.cuda.is_available():
                    outputs = retinanet(
                        [data['img'].to(device).float(), data['annot']])
                else:
                    outputs = retinanet([data['img'].float(), data['annot']])

                classification, regression, anchors, annotations = (outputs)
                classification_loss, regression_loss = criterion(
                    classification, regression, anchors, annotations)

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                if iter_num % 500 == 0:
                    print(
                        'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        print('epoch time :', time.time() - start_time)
        if loss_per_epoch > np.mean(loss_hist):
            print('best model is saved')
            torch.save(retinanet.state_dict(),
                       parser.saved_dir + 'best_model.pt')
            loss_per_epoch = np.mean(loss_hist)

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module,
                   '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt')
Exemplo n.º 25
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--model_save_path',
                        help='Path to save model',
                        type=str)

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=8,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    # add draw tensorboard code
    writer = SummaryWriter(log_dir='./logs/416*416/', flush_secs=60)
    # if Cuda:
    #     graph_inputs = torch.from_numpy(np.random.rand(1, 3, input_shape[0], input_shape[1])).type(
    #         torch.FloatTensor).cuda()
    # else:
    #     graph_inputs = torch.from_numpy(np.random.rand(1, 3, input_shape[0], input_shape[1])).type(torch.FloatTensor)
    # writer.add_graph(model, (graph_inputs,))

    # add gap save model count variable
    n = 0

    for epoch_num in range(parser.epochs):
        n += 1

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        ### begin calculate train loss
        for iter_num, data in enumerate(dataloader_train):
            # try:
            optimizer.zero_grad()

            if torch.cuda.is_available():
                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])
            else:
                classification_loss, regression_loss = retinanet(
                    [data['img'].float(), data['annot']])

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                .format(epoch_num, iter_num, float(classification_loss),
                        float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss
            # except Exception as e:
            #     print(e)
            #     continue

        ### begin calculate valid loss
        for iter_num, data in enumerate(dataloader_val):
            # try:
            optimizer.zero_grad()

            if torch.cuda.is_available():
                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])
            else:
                classification_loss, regression_loss = retinanet(
                    [data['img'].float(), data['annot']])

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            if bool(loss == 0):
                continue

            loss_hist.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Valid-Classification loss: {:1.5f} | Valid-Regression loss: {:1.5f} | Running Valid loss: {:1.5f}'
                .format(epoch_num, iter_num, float(classification_loss),
                        float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)
            print('Epoch: {} | mAP: {:.3f}'.format(epoch_num, float(mAP)))

        scheduler.step(np.mean(epoch_loss))

        if n % 10 == 0:
            torch.save(
                retinanet.module, parser.model_save_path +
                '/' + '{}_retinanet_{}_{:.3f}.pt'.format(
                    parser.dataset, epoch_num, mAP))

    retinanet.eval()

    torch.save(retinanet, parser.model_save_path + '/' + 'model_final.pt')
Exemplo n.º 26
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default='csv')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--batch_size', help='Batch size', type=int, default=2)
    parser.add_argument('--num_workers',
                        help='Number of workers',
                        type=int,
                        default=4)
    parser.add_argument('--models_out',
                        help='The directory to save models',
                        type=str)

    parser = parser.parse_args(args)

    if not os.path.exists(parser.models_out):
        os.makedirs(parser.models_out)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=parser.num_workers,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=parser.num_workers,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    writer = SummaryWriter(log_dir="tensor_log/" + parser.models_out)

    global_steps = 0
    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                running_loss = np.mean(loss_hist)
                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), running_loss))
                global_steps += 1
                writer.add_scalar("Loss/Classification",
                                  float(classification_loss), global_steps)
                writer.add_scalar("Loss/Regression", float(regression_loss),
                                  global_steps)
                writer.add_scalar("Loss/Running", running_loss, global_steps)

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)
            #for k, v in mAP.items():
            #    writer.add_scalar("Accuracy/map_{}".format(k), v, epoch_num)

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            os.path.join(
                parser.models_out,
                '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)))

    retinanet.eval()

    torch.save(retinanet, os.path.join(parser.models_out, 'model_final.pt'))
Exemplo n.º 27
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--model', help='Path to model (.pt) file.')

    parser.add_argument('--finetune',
                        help='if load trained retina model',
                        type=bool,
                        default=False)
    parser.add_argument('--gpu', help='', type=bool, default=False)
    parser.add_argument('--batch_size', help='', type=int, default=2)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    #sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
    sampler = AspectRatioBasedSampler(dataset_train,
                                      parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    '''
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
    else:
        raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
    '''

    use_gpu = parser.gpu

    #import pdb
    #pdb.set_trace()

    #读coco预训练模型
    retinanet = model.resnet50(num_classes=80, pretrained=True)
    retinanet.load_state_dict(torch.load(parser.model))
    for param in retinanet.parameters():
        param.requires_grad = False

    retinanet.regressionModel = model.RegressionModel(256)
    retinanet.classificationModel = model.ClassificationModel(
        256, num_classes=dataset_train.num_classes())

    prior = 0.01
    retinanet.classificationModel.output.weight.data.fill_(0)
    retinanet.classificationModel.output.bias.data.fill_(-math.log(
        (1.0 - prior) / prior))

    retinanet.regressionModel.output.weight.data.fill_(0)
    retinanet.regressionModel.output.bias.data.fill_(0)

    # for m in retinanet.classificationModel.modules():
    #     if isinstance(m, nn.Conv2d):
    #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
    #         m.weight.data.normal_(0, math.sqrt(2. / n))
    #     elif isinstance(m, nn.BatchNorm2d):
    #         m.weight.data.fill_(1)
    #         m.bias.data.zero_()

    # for m in retinanet.regressionModel.modules():
    #     if isinstance(m, nn.Conv2d):
    #         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
    #         m.weight.data.normal_(0, math.sqrt(2. / n))
    #     elif isinstance(m, nn.BatchNorm2d):
    #         m.weight.data.fill_(1)
    #         m.bias.data.zero_()

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if use_gpu and torch.cuda.is_available():
        #retinanet.load_state_dict(torch.load(parser.model))
        retinanet = torch.nn.DataParallel(retinanet).cuda()
    else:
        #retinanet.load_state_dict(torch.load(parser.model))
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(
        [{
            'params': retinanet.module.regressionModel.parameters()
        }, {
            'params': retinanet.module.classificationModel.parameters()
        }], 1e-6)

    #optimizer = optim.Adam(retinanet.parameters(), lr=1e-6)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                #import pdb
                #pdb.set_trace()

                optimizer.zero_grad()

                if use_gpu and torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        if epoch_num % 5 == 0:
            torch.save(
                retinanet.module,
                '{}_freezinetune_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()
Exemplo n.º 28
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser.add_argument('--finetune',
                        help='if load trained retina model',
                        type=bool,
                        default=False)
    parser.add_argument('--gpu', help='', type=bool, default=False)
    parser.add_argument('--batch_size', help='', type=int, default=2)

    parser.add_argument('--c',
                        help='continue with formal model',
                        type=bool,
                        default=False)
    parser.add_argument('--model', help='model path')

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    #sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
    sampler = AspectRatioBasedSampler(dataset_train,
                                      parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=16,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=8,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    epochpassed = 0
    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if parser.c:
        retinanet = torch.load(parser.model)
        #import pdb
        #pdb.set_trace()
        epochpassed = int(parser.model.split('.')[1].split('_')[-1])
    use_gpu = parser.gpu

    #torch.cuda.set_device(5)
    #import pdb
    #pdb.set_trace()

    if use_gpu:
        if torch.cuda.is_available():
            retinanet = retinanet.cuda()

    if use_gpu and torch.cuda.is_available():
        retinanet = torch.nn.DataParallel(retinanet).cuda()

    else:
        retinanet = torch.nn.DataParallel(retinanet)

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)  #original:1e-5
    #optimizer =optim.SGD(retinanet.parameters(), lr=0.01,weight_decay=0.0001, momentum=0.9)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    writer = SummaryWriter()

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        epoch_classification_loss = []
        epoch_regression_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                #import pdb
                #pdb.set_trace()

                optimizer.zero_grad()

                if use_gpu and torch.cuda.is_available():
                    classification_loss, regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    classification_loss, regression_loss = retinanet(
                        [data['img'].float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                epoch_classification_loss.append(float(classification_loss))
                epoch_regression_loss.append(float(regression_loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Epoch loss: {:1.5f}\r'
                    .format(epoch_num + epochpassed, iter_num,
                            float(classification_loss), float(regression_loss),
                            np.mean(loss_hist)),
                    end='')

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        print(
            'Epoch: {}  | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Epoch loss: {:1.5f}'
            .format(epoch_num + epochpassed,
                    np.mean(epoch_classification_loss),
                    np.mean(epoch_regression_loss), np.mean(epoch_loss)))

        writer.add_scalar('lossrecord/regressionloss',
                          np.mean(epoch_regression_loss),
                          epoch_num + epochpassed)
        writer.add_scalar('lossrecord/classificationloss',
                          np.mean(epoch_regression_loss),
                          epoch_num + epochpassed)
        writer.add_scalar('lossrecord/epochloss', np.mean(epoch_loss),
                          epoch_num + epochpassed)

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        if epoch_num % 10 == 0:
            torch.save(
                retinanet.module,
                './models/{}_retinanet{}_highResolution4fold_{}.pt'.format(
                    parser.dataset, parser.depth, epoch_num + epochpassed))

    #retinanet.eval()

    torch.save(
        retinanet.module,
        './models/{}_retinanet{}_highResolution4fold_{}.pt'.format(
            parser.dataset, parser.depth, parser.epochs + epochpassed))
    writer.close()
Exemplo n.º 29
0
dataset_val = CSVDataset(train_file="./myvaliddataset.csv", class_list="./class_detail.csv",
                                 transform=transforms.Compose([Normalizer(), Resizer()]))
sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=4,drop_last=False)
dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)


# In[9]:


retinanet = torch.load("./model_final_without_finetune2.pt").cuda()


# In[10]:


mAP = csv_eval.evaluate(dataset_val, retinanet)


# In[ ]:


epochs =100


# In[ ]:


dataset_train = CSVDataset(train_file="./mytraindataset.csv", class_list="./class_detail.csv",
                               transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
dataset_val = CSVDataset(train_file="./myvaliddataset.csv", class_list="./class_detail.csv",
                                 transform=transforms.Compose([Normalizer(), Resizer()]))
Exemplo n.º 30
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument("--load_model_path",
                        type=str,
                        default=None,
                        help="Path to model (.pt) file.")
    parser.add_argument('--dataset_type',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument('--backbone',
                        help='Backbone choice: [ResNet, ResNeXt]',
                        type=str,
                        default='ResNet')
    parser.add_argument(
        '--depth',
        help='ResNet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument("--batch_size",
                        type=int,
                        default=2,
                        help="size of the batches")
    parser.add_argument("--lr",
                        type=float,
                        default=1e-5,
                        help="adam: learning rate")

    parser = parser.parse_args(args)

    results_dir = "results"
    save_images_dir = os.path.join(results_dir, "images")
    save_models_dir = os.path.join(results_dir, "saved_models")

    os.makedirs(results_dir, exist_ok=True)
    os.makedirs(save_images_dir, exist_ok=True)
    os.makedirs(save_models_dir, exist_ok=True)

    # Get today datetime
    today = datetime.date.today()
    today = "%d%02d%02d" % (today.year, today.month, today.day)

    # Get current timme
    now = time.strftime("%H%M%S")

    # Backbone name
    backbone_name = parser.backbone + str(parser.depth)

    # DataSet name
    dataset_path = ''

    # Create the data loaders
    if parser.dataset_type == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        # dataset_train = CocoDataset(parser.coco_path, set_name='train2017',
        #                             transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
        # dataset_val = CocoDataset(parser.coco_path, set_name='val2017',
        #                           transform=transforms.Compose([Normalizer(), Resizer()]))

        dataset_train = CocoDataset(
            parser.coco_path,
            set_name='train',
            # transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
            transform=transforms.Compose(
                [Normalizer(), AugmenterWithImgaug(),
                 Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

        dataset_path = parser.coco_path

    elif parser.dataset_type == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

        dataset_path = parser.csv_train

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Retrain the model
    if parser.load_model_path is not None:
        # Load pretrained models
        print("\nLoading model from: [%s]" % parser.load_model_path)
        retinanet = torch.load(parser.load_model_path)
        print("\nStart retrain...")
    # Create the model
    else:
        print("\nStart train...")
        if parser.backbone == 'ResNet':
            if parser.depth == 18:
                retinanet = model.resnet18(
                    num_classes=dataset_train.num_classes(), pretrained=True)
            elif parser.depth == 34:
                retinanet = model.resnet34(
                    num_classes=dataset_train.num_classes(), pretrained=True)
            elif parser.depth == 50:
                retinanet = model.resnet50(
                    num_classes=dataset_train.num_classes(), pretrained=True)
            elif parser.depth == 101:
                retinanet = model.resnet101(
                    num_classes=dataset_train.num_classes(), pretrained=True)
            elif parser.depth == 152:
                retinanet = model.resnet152(
                    num_classes=dataset_train.num_classes(), pretrained=True)
            else:
                raise ValueError(
                    'Unsupported model depth, must be one of 18, 34, 50, 101, 152'
                )

        elif parser.backbone == 'ResNeXt':
            if parser.depth == 50:
                retinanet = model.resnext50_32x4d(
                    num_classes=dataset_train.num_classes(), pretrained=True)
            elif parser.depth == 101:
                retinanet = model.resnext101_32x8d(
                    num_classes=dataset_train.num_classes(), pretrained=True)
                pass
            else:
                raise ValueError(
                    "Unsupported model depth, must be one of 50, 101")

        else:
            raise ValueError("Choice a backbone, [ResNet, ResNeXt]")

    # Get dataset name
    dataset_name = os.path.split(dataset_path)[-1]

    # Checkpoint name
    save_ckpt_name = r"%s_%s-%s-RetinaNet-backbone(%s)-ep(%d)-bs(%d)-lr(%s)" \
                     % (today, now, dataset_name, backbone_name, parser.epochs, parser.batch_size, parser.lr)

    os.makedirs(os.path.join(save_images_dir, "%s" % save_ckpt_name),
                exist_ok=True)
    os.makedirs(os.path.join(save_models_dir, "%s" % save_ckpt_name),
                exist_ok=True)

    tb_log_path = os.path.join("tf_log", save_ckpt_name)
    tb_writer = SummaryWriter(os.path.join(results_dir, tb_log_path))

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)
    val_loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    epoch_prev_time = time.time()
    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        total_classification_loss = 0.0
        total_regression_loss = 0.0
        total_running_loss = 0.0

        total_val_classification_loss = 0.0
        total_val_regression_loss = 0.0
        total_val_running_loss = 0.0

        batch_prev_time = time.time()
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                # sum the loss for tensorboard at this batch
                total_regression_loss += regression_loss
                total_classification_loss += classification_loss
                total_running_loss += loss.item()

                # log = 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                #         epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))

                # Determine approximate time left
                data_done = iter_num
                data_left = len(dataloader_train) - data_done
                batch_time_left = datetime.timedelta(
                    seconds=data_left * (time.time() - batch_prev_time))
                batch_time_left = chop_microseconds(batch_time_left)

                batches_done = epoch_num * len(dataloader_train) + iter_num
                batches_left = parser.epochs * len(
                    dataloader_train) - batches_done
                total_time_left = datetime.timedelta(
                    seconds=batches_left * (time.time() - epoch_prev_time))
                total_time_left = chop_microseconds(total_time_left)

                batch_prev_time = time.time()
                epoch_prev_time = time.time()

                # Print training step log
                prefix_log = '[Epoch: {}/{}] | [Batch: {}/{}]'.format(
                    epoch_num + 1, parser.epochs, iter_num + 1,
                    len(dataloader_train))
                suffix_log = '[Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}] ETA: {} / {}'.format(
                    float(classification_loss), float(regression_loss),
                    np.mean(loss_hist), batch_time_left, total_time_left)

                printProgressBar(iteration=iter_num + 1,
                                 total=len(dataloader_train),
                                 prefix=prefix_log,
                                 suffix=suffix_log)

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        # Validation
        with torch.no_grad():
            val_batch_prev_time = time.time()
            for iter_num, data in enumerate(dataloader_val):
                try:

                    val_classification_loss, val_regression_loss = retinanet(
                        [data['img'].cuda().float(), data['annot']])

                    val_classification_loss = val_classification_loss.mean()
                    val_regression_loss = val_regression_loss.mean()

                    val_loss = val_classification_loss + val_regression_loss

                    if bool(val_loss == 0):
                        continue

                    val_loss_hist.append(float(val_loss))

                    # sum the loss for tensorboard at this batch
                    total_val_regression_loss += val_regression_loss
                    total_val_classification_loss += val_classification_loss
                    total_val_running_loss += val_loss.item()

                    # Determine approximate time left
                    data_done = iter_num
                    data_left = len(dataloader_val) - data_done
                    val_batch_time_left = datetime.timedelta(
                        seconds=data_left *
                        (time.time() - val_batch_prev_time))
                    val_batch_time_left = chop_microseconds(
                        val_batch_time_left)

                    batches_done = epoch_num * len(dataloader_val) + (
                        epoch_num + 1) * len(dataloader_train) + iter_num
                    batches_left = parser.epochs * (len(
                        dataloader_train) + len(dataloader_val)) - batches_done
                    total_time_left = datetime.timedelta(
                        seconds=batches_left * (time.time() - epoch_prev_time))
                    total_time_left = chop_microseconds(total_time_left)

                    val_batch_prev_time = time.time()
                    epoch_prev_time = time.time()

                    # Print training step log
                    prefix_log = 'Validation: [Epoch: {}/{}] | [Batch: {}/{}]'.format(
                        epoch_num + 1, parser.epochs, iter_num + 1,
                        len(dataloader_val))
                    suffix_log = '[Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}] ETA: {} / {}'.format(
                        float(val_classification_loss),
                        float(val_regression_loss), np.mean(val_loss_hist),
                        val_batch_time_left, total_time_left)

                    printProgressBar(iteration=iter_num + 1,
                                     total=len(dataloader_val),
                                     prefix=prefix_log,
                                     suffix=suffix_log)

                    del val_classification_loss
                    del val_regression_loss
                except Exception as e:
                    print(e)
                    continue

        # Evaluate AP
        if parser.dataset_type == 'coco':

            print('Evaluating dataset')

            # coco_eval.evaluate_coco(dataset_val, retinanet)
            coco_eval.evaluate_coco_and_save_image(
                dataset_val, retinanet,
                os.path.join(save_images_dir, save_ckpt_name), epoch_num + 1)

        elif parser.dataset_type == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        # calculate loss average
        average_classification_loss = total_classification_loss / len(
            dataloader_train)
        average_regression_loss = total_regression_loss / len(dataloader_train)
        average_running_loss = total_running_loss / len(dataloader_train)

        # TensorBoard
        tb_writer.add_scalar(tag='Classification Loss',
                             scalar_value=average_classification_loss,
                             global_step=epoch_num + 1)
        tb_writer.add_scalar(tag='Regression Loss',
                             scalar_value=average_regression_loss,
                             global_step=epoch_num + 1)
        tb_writer.add_scalar(tag='Total Loss',
                             scalar_value=average_running_loss,
                             global_step=epoch_num + 1)

        # Save model
        print("\nSave model to [%s] at %d epoch\n" %
              (save_ckpt_name, epoch_num + 1))
        checkpoint_path = os.path.join(
            save_models_dir, "%s/RetinaNet_backbone(%s)_%d.pt" %
            (save_ckpt_name, backbone_name, epoch_num + 1))
        torch.save(retinanet.module, checkpoint_path)
        # torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset_type, epoch_num + 1))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt')