Пример #1
0
def main(args=None):
    parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

    parser.add_argument('--model', help='Path to model (.pt) file.')

    parser = parser.parse_args(args)

    dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]), is_visualizing=True)

    sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)

    retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True)
    retinanet.load_state_dict(torch.load(parser.model))

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet.eval()

    scores_for_rnn = {}

    for idx, data in enumerate(dataloader_val):
        print(idx)

        with torch.no_grad():
            img_name = data['img_name'][0]
            scale = data['scale'][0]
            scores, transformed_anchors = retinanet(data['img'].cuda().float(), return_all_scores=True)
            transformed_anchors /= scale
            scores, transformed_anchors = scores.cpu(), transformed_anchors.cpu()
            scores = [[scores[i,j].item() for j in range(scores.size(1))] for i in range(scores.size(0))]
            transformed_anchors = [[transformed_anchors[i,j].item() for j in range(transformed_anchors.size(1))] for i in range(transformed_anchors.size(0))]
            curr = {'scores': scores, 'bboxes': transformed_anchors}
            scores_for_rnn[img_name] = curr

    with open('detections.json', 'w') as f:
        json.dump(scores_for_rnn, f)
Пример #2
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default="csv")
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)',
                        default="binary_class.csv")
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=18)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=500)
    parser.add_argument('--epochs_only_det',
                        help='Number of epochs to train detection part',
                        type=int,
                        default=1)
    parser.add_argument('--max_epochs_no_improvement',
                        help='Max epochs without improvement',
                        type=int,
                        default=100)
    parser.add_argument('--pretrained_model',
                        help='Path of .pt file with pretrained model',
                        default='esposallescsv_retinanet_0.pt')
    parser.add_argument('--model_out',
                        help='Path of .pt file with trained model to save',
                        default='trained')

    parser.add_argument('--score_threshold',
                        help='Score above which boxes are kept',
                        type=float,
                        default=0.5)
    parser.add_argument('--nms_threshold',
                        help='Score above which boxes are kept',
                        type=float,
                        default=0.2)
    parser.add_argument('--max_boxes',
                        help='Max boxes to be fed to recognition',
                        default=95)
    parser.add_argument('--seg_level',
                        help='[line, word], to choose anchor aspect ratio',
                        default='word')
    parser.add_argument(
        '--early_stop_crit',
        help='Early stop criterion, detection (map) or transcription (cer)',
        default='cer')
    parser.add_argument('--max_iters_epoch',
                        help='Max steps per epoch (for debugging)',
                        default=1000000)
    parser.add_argument('--train_htr',
                        help='Train recognition or not',
                        default='True')
    parser.add_argument('--train_det',
                        help='Train detection or not',
                        default='True')
    parser.add_argument(
        '--binary_classifier',
        help=
        'Wether to use classification branch as binary or not, multiclass instead.',
        default='False')
    parser.add_argument(
        '--htr_gt_box',
        help='Train recognition branch with box gt (for debugging)',
        default='False')
    parser.add_argument(
        '--ner_branch',
        help='Train named entity recognition with separate branch',
        default='False')

    parser = parser.parse_args(args)

    if parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train')

        dataset_name = parser.csv_train.split("/")[-2]

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    # Files for training log

    experiment_id = str(time.time()).split('.')[0]
    valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'w')
    for arg in vars(parser):
        if getattr(parser, arg) is not None:
            valid_cer_f.write(
                str(arg) + ' ' + str(getattr(parser, arg)) + '\n')

    current_commit = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
    valid_cer_f.write(str(current_commit))

    valid_cer_f.write(
        "epoch_num   cer     best cer     mAP    best mAP     time\n")

    valid_cer_f.close()

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=1,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=0,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    if not os.path.exists('trained_models'):
        os.mkdir('trained_models')

    # Create the model

    train_htr = parser.train_htr == 'True'
    htr_gt_box = parser.htr_gt_box == 'True'
    ner_branch = parser.ner_branch == 'True'
    binary_classifier = parser.binary_classifier == 'True'
    torch.backends.cudnn.benchmark = False

    alphabet = dataset_train.alphabet
    if os.path.exists(parser.pretrained_model):
        retinanet = torch.load(parser.pretrained_model)
        retinanet.classificationModel = ClassificationModel(
            num_features_in=256,
            num_anchors=retinanet.anchors.num_anchors,
            num_classes=dataset_train.num_classes())
        if ner_branch:
            retinanet.nerModel = NERModel(
                feature_size=256,
                pool_h=retinanet.pool_h,
                n_classes=dataset_train.num_classes(),
                pool_w=retinanet.pool_w)
    else:
        if parser.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       max_boxes=int(parser.max_boxes),
                                       score_threshold=float(
                                           parser.score_threshold),
                                       seg_level=parser.seg_level,
                                       alphabet=alphabet,
                                       train_htr=train_htr,
                                       htr_gt_box=htr_gt_box,
                                       ner_branch=ner_branch,
                                       binary_classifier=binary_classifier)

        elif parser.depth == 34:

            retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       max_boxes=int(parser.max_boxes),
                                       score_threshold=float(
                                           parser.score_threshold),
                                       seg_level=parser.seg_level,
                                       alphabet=alphabet,
                                       train_htr=train_htr,
                                       htr_gt_box=htr_gt_box)

        elif parser.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif parser.depth == 101:
            retinanet = model.resnet101(
                num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 152:
            retinanet = model.resnet152(
                num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True
    train_htr = parser.train_htr == 'True'
    train_det = parser.train_det == 'True'
    retinanet.htr_gt_box = parser.htr_gt_box == 'True'

    retinanet.train_htr = train_htr
    retinanet.epochs_only_det = parser.epochs_only_det

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=50,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)
    ctc = CTCLoss()
    retinanet.train()
    retinanet.module.freeze_bn()

    best_cer = 1000
    best_map = 0
    epochs_no_improvement = 0
    verbose_each = 20
    optimize_each = 1
    objective = 100
    best_objective = 10000

    print(('Num training images: {}'.format(len(dataset_train))))

    for epoch_num in range(parser.epochs):
        cers = []

        retinanet.training = True

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            if iter_num > int(parser.max_iters_epoch): break
            try:
                if iter_num % optimize_each == 0:
                    optimizer.zero_grad()
                (classification_loss, regression_loss, ctc_loss,
                 ner_loss) = retinanet([
                     data['img'].cuda().float(), data['annot'], ctc, epoch_num
                 ])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                if train_det:

                    if train_htr:
                        loss = ctc_loss + classification_loss + regression_loss + ner_loss

                    else:
                        loss = classification_loss + regression_loss + ner_loss

                elif train_htr:
                    loss = ctc_loss

                else:
                    continue
                if bool(loss == 0):
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                if iter_num % verbose_each == 0:
                    print((
                        'Epoch: {} | Step: {} |Classification loss: {:1.5f} | Regression loss: {:1.5f} | CTC loss: {:1.5f} | NER loss: {:1.5f} | Running loss: {:1.5f} | Total loss: {:1.5f}\r'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), float(ctc_loss),
                                float(ner_loss), np.mean(loss_hist),
                                float(loss), "\r")))

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                torch.cuda.empty_cache()

            except Exception as e:
                print(e)
                continue
        if parser.dataset == 'csv' and parser.csv_val is not None and train_det:

            print('Evaluating dataset')

            mAP, text_mAP, current_cer = csv_eval.evaluate(
                dataset_val, retinanet, score_threshold=parser.score_threshold)
            #text_mAP,_ = csv_eval_binary_map.evaluate(dataset_val, retinanet,score_threshold=parser.score_threshold)
            objective = current_cer * (1 - mAP)

        retinanet.eval()
        retinanet.training = False
        retinanet.score_threshold = float(parser.score_threshold)
        '''for idx,data in enumerate(dataloader_val):
            if idx>int(parser.max_iters_epoch): break
            print("Eval CER on validation set:",idx,"/",len(dataset_val),"\r")
            image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2]

            #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val)
            text_gt =".".join(dataset_val.image_names[idx].split('.')[:-1])+'.txt'
            f =open(text_gt,'r')
            text_gt_lines=f.readlines()[0]
            transcript_pred = get_transcript(image_name,data,retinanet,float(parser.score_threshold),float(parser.nms_threshold),dataset_val,alphabet)
            cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))'''

        t = str(time.time()).split('.')[0]

        valid_cer_f.close()
        #print("GT",text_gt_lines)
        #print("PREDS SAMPLE:",transcript_pred)

        if parser.early_stop_crit == 'cer':

            if float(objective) < float(
                    best_objective):  #float(current_cer)<float(best_cer):
                best_cer = current_cer
                best_objective = objective

                epochs_no_improvement = 0
                torch.save(
                    retinanet.module, 'trained_models/' + parser.model_out +
                    '{}_retinanet.pt'.format(parser.dataset))

            else:
                epochs_no_improvement += 1
            if mAP > best_map:
                best_map = mAP
        elif parser.early_stop_crit == 'map':
            if mAP > best_map:
                best_map = mAP
                epochs_no_improvement = 0
                torch.save(
                    retinanet.module, 'trained_models/' + parser.model_out +
                    '{}_retinanet.pt'.format(parser.dataset))

            else:
                epochs_no_improvement += 1
            if float(current_cer) < float(best_cer):
                best_cer = current_cer
        if train_det:
            print(epoch_num, "mAP: ", mAP, " best mAP", best_map)
        if train_htr:
            print("VALID CER:", current_cer, "best CER", best_cer)
        print("Epochs no improvement:", epochs_no_improvement)
        valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt',
                           'a')
        valid_cer_f.write(
            str(epoch_num) + " " + str(current_cer) + " " + str(best_cer) +
            ' ' + str(mAP) + ' ' + str(best_map) + ' ' + str(text_mAP) + '\n')
        if epochs_no_improvement > 3:
            for param_group in optimizer.param_groups:
                if param_group['lr'] > 10e-5:
                    param_group['lr'] *= 0.1

        if epochs_no_improvement >= parser.max_epochs_no_improvement:
            print("TRAINING FINISHED AT EPOCH", epoch_num, ".")
            sys.exit()

        scheduler.step(np.mean(epoch_loss))
        torch.cuda.empty_cache()

    retinanet.eval()
Пример #3
0
def main(args=None):

    parser     = argparse.ArgumentParser(description='Simple testing script for RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.',default = "csv")
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)',default="binary_class.csv")
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
    parser.add_argument('--csv_box_annot', help='Path to file containing predicted box annotations ')

    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=500)
    parser.add_argument('--model', help='Path of .pt file with trained model',default = 'esposallescsv_retinanet_0.pt')
    parser.add_argument('--model_out', help='Path of .pt file with trained model to save',default = 'trained')

    parser.add_argument('--score_threshold', help='Score above which boxes are kept',default=0.15)
    parser.add_argument('--nms_threshold', help='Score above which boxes are kept',default=0.2)
    parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement',default=100)
    parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition',default=50)
    parser.add_argument('--seg_level', help='Line or word, to choose anchor aspect ratio',default='line')
    parser.add_argument('--htr_gt_box',help='Train recognition branch with box gt (for debugging)',default=False)
    parser = parser.parse_args(args)
    
    # Create the data loaders

    if parser.dataset == 'csv':


        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO,')


        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))


        if parser.csv_box_annot is not None:
            box_annot_data = CSVDataset(train_file=parser.csv_box_annot, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

        else:    
            box_annot_data = None
    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    
    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val)

    if box_annot_data is not None:
        sampler_val = AspectRatioBasedSampler(box_annot_data, batch_size=1, drop_last=False)
        dataloader_box_annot = DataLoader(box_annot_data, num_workers=0, collate_fn=collater, batch_sampler=sampler_val)

    else:
        dataloader_box_annot = dataloader_val

    if not os.path.exists('trained_models'):
        os.mkdir('trained_models')

    # Create the model

    alphabet=dataset_val.alphabet
    if os.path.exists(parser.model):
        retinanet = torch.load(parser.model)
    else:
        if parser.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True,max_boxes=int(parser.max_boxes),score_threshold=float(parser.score_threshold),seg_level=parser.seg_level,alphabet=alphabet)
        elif parser.depth == 34:
            retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 101:
            retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 152:
            retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')        
    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()
    
    retinanet = torch.nn.DataParallel(retinanet).cuda()
    
    #retinanet = torch.load('../Documents/TRAINED_MODELS/pytorch-retinanet/esposallescsv_retinanet_99.pt')
    #print "LOADED pretrained MODEL\n\n"
    

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True)

    loss_hist = collections.deque(maxlen=500)
    ctc = CTCLoss()
    retinanet.module.freeze_bn()
    best_cer = 1000
    epochs_no_improvement=0
    
    cers=[]    
    retinanet.eval()
    retinanet.module.epochs_only_det = 0
    #retinanet.module.htr_gt_box = False
    
    retinanet.training=False    
    if parser.score_threshold is not None:
        retinanet.module.score_threshold = float(parser.score_threshold) 
    
    '''if parser.dataset == 'csv' and parser.csv_val is not None:

        print('Evaluating dataset')
    '''
    mAP = csv_eval.evaluate(dataset_val, retinanet,score_threshold=retinanet.module.score_threshold)
    aps = []
    for k,v in mAP.items():
        aps.append(v[0])
    print ("VALID mAP:",np.mean(aps))
            
    print("score th",retinanet.module.score_threshold)
    for idx,data in enumerate(dataloader_box_annot):
        print("Eval CER on validation set:",idx,"/",len(dataloader_box_annot),"\r")
        if box_annot_data:
            image_name = box_annot_data.image_names[idx].split('/')[-1].split('.')[-2]
        else:    
            image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2]
        #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val)
        text_gt_path="/".join(dataset_val.image_names[idx].split('/')[:-1])
        text_gt = os.path.join(text_gt_path,image_name+'.txt')
        f =open(text_gt,'r')
        text_gt_lines=f.readlines()[0]
        transcript_pred = get_transcript(image_name,data,retinanet,retinanet.module.score_threshold,float(parser.nms_threshold),dataset_val,alphabet)
        cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))
        print("GT",text_gt_lines)
        print("PREDS SAMPLE:",transcript_pred)
        print("VALID CER:",np.mean(cers),"best CER",best_cer)    
    print("GT",text_gt_lines)
    print("PREDS SAMPLE:",transcript_pred)
    print("VALID CER:",np.mean(cers),"best CER",best_cer)    
def train(args):
    train_csv = args.train_csv
    test_csv = args.test_csv
    labels_csv = args.labels_csv
    model_type = args.model_type
    epochs = int(args.epochs)
    batch_size = int(args.batch_size)

    dataset_train = CSVDataset(train_file=train_csv,
                               class_list=labels_csv,
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    dataset_val = CSVDataset(train_file=test_csv,
                             class_list=labels_csv,
                             transform=transforms.Compose(
                                 [Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    retinanet = RetinaNet_efficientnet_b4(
        num_classes=dataset_train.num_classes(), model_type=model_type)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(epochs):
        retinanet.train()
        retinanet.module.freeze_bn()
        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss
                if bool(loss == 0):
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))
                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))
                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue
        mAP, MAP = evaluate(dataset_val, retinanet)
        scheduler.step(np.mean(epoch_loss))
        torch.save(
            retinanet.module,
            '{}_retinanet_{}_map{}.pt'.format("EfficientNet" + model_type,
                                              epoch_num, MAP))
        retinanet.eval()
        torch.save(retinanet, 'model_final.pt'.format(epoch_num))
Пример #5
0
def main(args=None):
    """
    In current implementation, if test csv is provided, we use that as validation set and combine the val and train csv's 
    as the csv for training.

    If train_all_labeled_data flag is use, then we combine all 3 (if test is provided) for training and use a prespecified learning rate step schedule.
    """

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)',
        default=None)
    parser.add_argument(
        '--csv_test',
        help=
        'Path to file containing test annotations (optional, if provided, train & val will be combined for training and test will be used for evaluation)',
        default=None)
    parser.add_argument('--lr', type=float, default=2e-5)
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=101)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=25)
    parser.add_argument('--model_output_dir', type=str, default='models')
    parser.add_argument(
        '--train_all_labeled_data',
        help=
        'Combine train, val, and test into 1 training set. Will use prespecified learning rate scheduler steps',
        action='store_true')
    parser.add_argument('--resnet-backbone-normalization',
                        choices=['batch_norm', 'group_norm'],
                        type=str,
                        default='batch_norm')

    parser = parser.parse_args(args)

    print('Learning Rate: {}'.format(parser.lr))
    print("Normalization: ", parser.resnet_backbone_normalization)

    # Create folder - will raise error if folder exists
    assert (os.path.exists(parser.model_output_dir) == False)
    os.mkdir(parser.model_output_dir)

    if parser.csv_train is None:
        raise ValueError('Must provide --csv_train when training,')

    if parser.csv_classes is None:
        raise ValueError('Must provide --csv_classes when training,')

    if not parser.csv_val and parser.csv_test:
        raise ValueError(
            "Cannot specify test set without specifying validation set")

    if parser.train_all_labeled_data:
        csv_paths = [parser.csv_train, parser.csv_val, parser.csv_test]
        train_csv = []
        for path in csv_paths:
            if isinstance(path, str):
                train_csv.append(path)
        val_csv = None
    else:
        if parser.csv_train and parser.csv_val and parser.csv_test:
            train_csv = [parser.csv_train, parser.csv_val
                         ]  # Combine train and val sets for training
            val_csv = parser.csv_test
        else:
            train_csv = parser.csv_train
            val_csv = parser.csv_val

    print('loading train data')
    print(train_csv)
    dataset_train = CSVDataset(train_file=train_csv,
                               class_list=parser.csv_classes,
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    print(dataset_train.__len__())

    if val_csv is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        dataset_val = CSVDataset(train_file=val_csv,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))

    print('putting data into loader')
    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    print('creating model')
    if parser.depth == 18:
        retinanet = model.resnet18(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            normalization=parser.resnet_backbone_normalization)
    elif parser.depth == 34:
        retinanet = model.resnet34(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            normalization=parser.resnet_backbone_normalization)
    elif parser.depth == 50:
        retinanet = model.resnet50(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            normalization=parser.resnet_backbone_normalization)
    elif parser.depth == 101:
        retinanet = model.resnet101(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            normalization=parser.resnet_backbone_normalization)
    elif parser.depth == 152:
        retinanet = model.resnet152(
            num_classes=dataset_train.num_classes(),
            pretrained=True,
            normalization=parser.resnet_backbone_normalization)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr)

    lr_factor = 0.3
    if not parser.train_all_labeled_data:
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                         patience=3,
                                                         factor=lr_factor,
                                                         verbose=True)
    else:
        # these milestones are for when using the lung masks - not for unmasked lung data
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[12, 16, 20,
                                   24], gamma=lr_factor)  # masked training
        #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[14, 18, 22, 26], gamma=lr_factor)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    #initialize tensorboard
    writer = SummaryWriter(comment=parser.model_output_dir)

    # Augmentation
    seq = iaa.Sequential([
        iaa.Fliplr(0.5),
        iaa.Flipud(0.5),
        iaa.Affine(scale={
            "x": (1.0, 1.2),
            "y": (1.0, 1.2)
        },
                   rotate=(-20, 20),
                   shear=(-4, 4))
    ],
                         random_order=True)

    def augment(data, seq):
        for n, img in enumerate(data['img']):
            # imgaug needs dim in format (H, W, C)
            image = data['img'][n].permute(1, 2, 0).numpy()

            bbs_array = []
            for ann in data['annot'][n]:
                x1, y1, x2, y2, _ = ann
                bbs_array.append(BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2))

            bbs = BoundingBoxesOnImage(bbs_array, shape=image.shape)
            image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs)

            # save augmented image and chage dims to (C, H, W)
            data['img'][n] = torch.tensor(image_aug.copy()).permute(2, 0, 1)

            # save augmented annotations
            for i, bbox in enumerate(bbs_aug.bounding_boxes):
                x1, y1, x2, y2 = bbox.x1, bbox.y1, bbox.x2, bbox.y2
                obj_class = data['annot'][n][i][-1]
                data['annot'][n][i] = torch.tensor([x1, y1, x2, y2, obj_class])

        return data

    print('Num training images: {}'.format(len(dataset_train)))
    dir_training_images = os.path.join(os.getcwd(), writer.log_dir,
                                       'training_images')
    os.mkdir(dir_training_images)

    best_validation_loss = None
    best_validation_map = None

    for epoch_num in range(parser.epochs):

        writer.add_scalar('Train/LR', optimizer.param_groups[0]['lr'],
                          epoch_num)

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                data = augment(data, seq)

                # save a few training images to see what augmentation looks like
                if iter_num % 100 == 0 and epoch_num == 0:
                    x1, y1, x2, y2, _ = data['annot'][0][0]

                    fig, ax = plt.subplots(1)
                    ax.imshow(data['img'][0][1])
                    rect = patches.Rectangle((x1, y1),
                                             x2 - x1,
                                             y2 - y1,
                                             linewidth=1,
                                             edgecolor='r',
                                             facecolor='none',
                                             alpha=1)
                    ax.add_patch(rect)
                    fig.savefig(
                        os.path.join(dir_training_images,
                                     '{}.png'.format(iter_num)))
                    plt.close()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                if parser.resnet_backbone_normalization == 'batch_norm':
                    torch.nn.utils.clip_grad_norm_(
                        parameters=retinanet.parameters(), max_norm=0.1)
                else:
                    torch.nn.utils.clip_grad_norm_(
                        parameters=retinanet.parameters(), max_norm=0.01
                    )  # Decrease norm to reduce risk of exploding gradients

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        writer.add_scalar('Train/Loss', np.mean(epoch_loss), epoch_num)

        if not parser.train_all_labeled_data:
            print('Evaluating Validation Loss...')
            with torch.no_grad():
                retinanet.train()
                val_losses, val_class_losses, val_reg_losses = [], [], []
                for val_iter_num, val_data in enumerate(dataloader_val):
                    try:
                        val_classification_loss, val_regression_loss = retinanet(
                            [
                                val_data['img'].cuda().float(),
                                val_data['annot']
                            ])
                        val_losses.append(
                            float(val_classification_loss) +
                            float(val_regression_loss))
                        val_class_losses.append(float(val_classification_loss))
                        val_reg_losses.append(float(val_regression_loss))
                        del val_classification_loss, val_regression_loss
                    except Exception as e:
                        print(e)
                        continue
                print(
                    'VALIDATION Epoch: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Total loss: {:1.5f}'
                    .format(epoch_num, np.mean(val_class_losses),
                            np.mean(val_reg_losses), np.mean(val_losses)))

                # Save model with best validation loss
                if best_validation_loss is None:
                    best_validation_loss = np.mean(val_losses)
                if best_validation_loss >= np.mean(val_losses):
                    best_validation_loss = np.mean(val_losses)
                    torch.save(
                        retinanet.module,
                        parser.model_output_dir + '/best_result_valloss.pt')

                writer.add_scalar('Validation/Loss', np.mean(val_losses),
                                  epoch_num)

                # Calculate Validation mAP
                print('Evaluating validation mAP')
                mAP = csv_eval.evaluate(dataset_val, retinanet)
                print("Validation mAP: " + str(mAP[0][0]))
                if best_validation_map is None:
                    best_validation_map = mAP[0][0]
                elif best_validation_map < mAP[0][0]:
                    best_validation_map = mAP[0][0]
                    torch.save(
                        retinanet.module,
                        parser.model_output_dir + '/best_result_valmAP.pt')

                writer.add_scalar('Validation/mAP', mAP[0][0], epoch_num)

        if not parser.train_all_labeled_data:
            scheduler.step(np.mean(val_losses))
        else:
            scheduler.step()

        torch.save(
            retinanet.module,
            parser.model_output_dir + '/retinanet_{}.pt'.format(epoch_num))

    retinanet.eval()

    torch.save(retinanet, parser.model_output_dir + '/model_final.pt')
Пример #6
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a CTracker network.')

    parser.add_argument('--dataset',
                        default='csv',
                        type=str,
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--model_dir',
                        default='./ctracker/',
                        type=str,
                        help='Path to save the model.')
    parser.add_argument(
        '--root_path',
        default='/Dataset/Tracking/MOT17/',
        type=str,
        help='Path of the directory containing both label and images')
    parser.add_argument(
        '--csv_train',
        default='train_annots.csv',
        type=str,
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        default='train_labels.csv',
                        type=str,
                        help='Path to file containing class list (see readme)')

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--print_freq',
                        help='Print frequency',
                        type=int,
                        default=100)
    parser.add_argument(
        '--save_every',
        help='Save a checkpoint of model at given interval of epochs',
        type=int,
        default=5)

    parser = parser.parse_args(args)
    print(parser)

    print(parser.model_dir)
    if not os.path.exists(parser.model_dir):
        os.makedirs(parser.model_dir)

    # Create the data loaders
    if parser.dataset == 'csv':
        if (parser.csv_train is None) or (parser.csv_train == ''):
            raise ValueError('Must provide --csv_train when training on COCO,')

        if (parser.csv_classes is None) or (parser.csv_classes == ''):
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(parser.root_path, train_file=os.path.join(parser.root_path, parser.csv_train), class_list=os.path.join(parser.root_path, parser.csv_classes), \
         transform=transforms.Compose([RandomSampleCrop(), PhotometricDistort(), Augmenter(), Normalizer()]))#transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    # sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=8,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=32,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    # optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    optimizer = optim.Adam(retinanet.parameters(), lr=5e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    total_iter = 0
    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                total_iter = total_iter + 1
                optimizer.zero_grad()

                (classification_loss, regression_loss), reid_loss = retinanet([
                    data['img'].cuda().float(), data['annot'],
                    data['img_next'].cuda().float(), data['annot_next']
                ])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                reid_loss = reid_loss.mean()

                # loss = classification_loss + regression_loss + track_classification_losses
                loss = classification_loss + regression_loss + reid_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))

                # print frequency default=100 or e.g. --print_freq 500
                if total_iter % parser.print_freq == 0:
                    print(
                        'Epoch: {} | Iter: {} | Cls loss: {:1.5f} | Reid loss: {:1.5f} | Reg loss: {:1.5f} | Running loss: {:1.5f}'
                        .format(epoch_num, iter_num,
                                float(classification_loss), float(reid_loss),
                                float(regression_loss), np.mean(loss_hist)))

            except Exception as e:
                print(e)
                continue

        scheduler.step(np.mean(epoch_loss))
        # Save a checkpoint of model at given interval of epochs e.g. --save_every 10
        if epoch_num % parser.save_every == 0:
            torch.save(
                retinanet,
                os.path.join(parser.model_dir,
                             "weights_epoch_" + str(epoch_num) + ".pt"))

    retinanet.eval()

    torch.save(retinanet, os.path.join(parser.model_dir, 'model_final.pt'))
    run_from_train(parser.model_dir, parser.root_path)
Пример #7
0
def bbox_extraction(file_list='./data/images2.csv'):
    weights_path = './models/csv_retinanet_25.pt'
    csv_classes = './classes.csv'

    dataset_val = CSVDataset(train_file=file_list,
                             class_list=csv_classes,
                             transform=transforms.Compose(
                                 [Normalizer(), Resizer()]))
    # dataset_val = CSVDataset(train_file=file_list, class_list= csv_classes, transform=transforms.Compose([Normalizer()]))
    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=1,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    retinanet = model.resnet50(num_classes=dataset_val.num_classes(),
                               pretrained=False)
    retinanet.load_state_dict(torch.load(weights_path))

    use_gpu = True
    if torch.cuda.is_available():
        device = torch.device("cuda")
    if use_gpu:
        retinanet = retinanet.to(device)

    retinanet.eval()

    unnormalize = UnNormalizer()

    for idx, data in enumerate(dataloader_val):

        with torch.no_grad():
            scores, classification, transformed_anchors = retinanet(
                data['img'].to(device).float())

            def get_bbox(classification, transformed_anchors, label=0):
                bbox = {}
                idx = np.where(classification == label)[0][0]
                co_ord = transformed_anchors[idx, :]
                bbox['x1'] = int(co_ord[0])
                bbox['y1'] = int(co_ord[1])
                bbox['x2'] = int(co_ord[2])
                bbox['y2'] = int(co_ord[3])

                return bbox

            scores = scores.cpu().numpy()
            classification = classification.cpu().numpy()
            transformed_anchors = transformed_anchors.cpu().numpy()
            # print('scores:',scores)
            # print('classification:', classification)
            # print('transformed_anchors', transformed_anchors)
            bbox = {}
            bbox['neck'] = get_bbox(classification,
                                    transformed_anchors,
                                    label=0)
            bbox['stomach'] = get_bbox(classification,
                                       transformed_anchors,
                                       label=1)

            # print('neck',bbox['neck'] )
            # print('stomach',bbox['stomach'] )

            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()
            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)

            cv2.rectangle(img, (bbox['neck']['x1'], bbox['neck']['y1']),
                          (bbox['neck']['x2'], bbox['neck']['y2']),
                          color=(0, 0, 255),
                          thickness=2)
            cv2.rectangle(img, (bbox['stomach']['x1'], bbox['stomach']['y1']),
                          (bbox['stomach']['x2'], bbox['stomach']['y2']),
                          color=(0, 0, 255),
                          thickness=2)

            # cv2.imshow('img', img)
            # cv2.imwrite('./sample_11.jpg',img)
            # cv2.waitKey(0)

            return bbox


# bbox_extraction()

# if __name__ == '__main__':
#  main()
Пример #8
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=50)

    parser.add_argument('--model_name', help='name of the model to save')
    parser.add_argument('--pretrained', help='pretrained model name')

    parser = parser.parse_args(args)

    # Create the data loaders
    dataset_train = CSVDataset(train_file=parser.csv_train,
                               class_list=parser.csv_classes,
                               transform=transforms.Compose(
                                   [Resizer(),
                                    Augmenter(),
                                    Normalizer()]))

    if parser.csv_val is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        dataset_val = CSVDataset(train_file=parser.csv_val,
                                 class_list=parser.csv_classes,
                                 transform=transforms.Compose(
                                     [Resizer(), Normalizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=16,
                                  collate_fn=collater,
                                  batch_sampler=sampler)
    #dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_size=8, shuffle=True)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=2,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=16,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)
        #dataloader_val = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_size=8, shuffle=True)

    # Create the model_pose_level_attention
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes())
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes())
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes())
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes())
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes())
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if ckpt:
        retinanet = torch.load('')
        print('load ckpt')
    else:
        retinanet_dict = retinanet.state_dict()
        pretrained_dict = torch.load('./weight/' + parser.pretrained)
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in retinanet_dict
        }
        retinanet_dict.update(pretrained_dict)
        retinanet.load_state_dict(retinanet_dict)
        print('load pretrained backbone')

    print(retinanet)
    retinanet = torch.nn.DataParallel(retinanet, device_ids=[0])
    retinanet.cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    #optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    f_map = open('./mAP_txt/' + parser.model_name + '.txt', 'a')
    writer = SummaryWriter(log_dir='./summary')
    iters = 0
    for epoch_num in range(0, parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        #scheduler.step()

        for iter_num, data in enumerate(dataloader_train):

            iters += 1

            optimizer.zero_grad()

            classification_loss_f, regression_loss_f, classification_loss_v, regression_loss_v = retinanet(
                [
                    data['img'].cuda().float(), data['annot'], data['vbox'],
                    data['ignore']
                ])

            classification_loss_f = classification_loss_f.mean()
            regression_loss_f = regression_loss_f.mean()
            classification_loss_v = classification_loss_v.mean()
            regression_loss_v = regression_loss_v.mean()

            loss = classification_loss_f + regression_loss_f + classification_loss_v + regression_loss_v

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Classification loss_f: {:1.5f} | Regression loss_f: {:1.5f} | Classification loss_v {:1.5f} | Regression loss_v {:1.5f} | Running loss: {:1.5f}'
                .format(epoch_num, iter_num, float(classification_loss_f),
                        float(regression_loss_f), float(classification_loss_v),
                        float(regression_loss_v), np.mean(loss_hist)))

            writer.add_scalar('classification_loss_f', classification_loss_f,
                              iters)
            writer.add_scalar('regression_loss_f', regression_loss_f, iters)
            writer.add_scalar('classification_loss_v', classification_loss_v,
                              iters)
            writer.add_scalar('regression_loss_v', regression_loss_v, iters)
            writer.add_scalar('loss', loss, iters)

        if parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)
            f_map.write('mAP:{}, epoch:{}'.format(mAP[0][0], epoch_num))
            f_map.write('\n')

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module,
                   './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))

    retinanet.eval()

    writer.export_scalars_to_json(
        "./summary/' + parser.pretrained + 'all_scalars.json")
    f_map.close()
    writer.close()
Пример #9
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument(
        '--wider_train',
        help='Path to file containing WIDER training annotations (see readme)')
    parser.add_argument(
        '--wider_val',
        help=
        'Path to file containing WIDER validation annotations (optional, see readme)'
    )
    parser.add_argument('--wider_train_prefix',
                        help='Prefix path to WIDER train images')
    parser.add_argument('--wider_val_prefix',
                        help='Prefix path to WIDER validation images')

    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=50)
    parser.add_argument('--batch_size',
                        help='Batch size (default 2)',
                        type=int,
                        default=2)

    parser.add_argument('--model_name', help='Name of the model to save')
    parser.add_argument('--parallel',
                        help='Run training with DataParallel',
                        dest='parallel',
                        default=False,
                        action='store_true')
    parser.add_argument('--pretrained',
                        help='Pretrained model name in weight directory')

    parser = parser.parse_args(args)
    create_dirs()

    # Create the data loaders
    if parser.wider_train is None:
        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Resizer(),
                                        Augmenter(),
                                        Normalizer()]))
    else:
        dataset_train = WIDERDataset(train_file=parser.wider_train,
                                     img_prefix=parser.wider_train_prefix,
                                     transform=transforms.Compose([
                                         Resizer(),
                                         Augmenter(),
                                         Normalizer()
                                     ]))

    if parser.wider_val is None:
        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            print('Loading CSV validation dataset')
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Resizer(), Normalizer()]))
    else:
        print('Loading WIDER validation dataset')
        dataset_val = WIDERDataset(train_file=parser.wider_val,
                                   img_prefix=parser.wider_val_prefix,
                                   transform=transforms.Compose(
                                       [Resizer(), Normalizer()]))

    print('Loading training dataset')
    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    if parser.parallel:
        dataloader_train = DataLoader(dataset_train,
                                      num_workers=16,
                                      collate_fn=collater,
                                      batch_sampler=sampler)
    else:
        dataloader_train = DataLoader(dataset_train,
                                      collate_fn=collater,
                                      batch_sampler=sampler)

    # Create the model_pose_level_attention
    if parser.depth == 18:
        retinanet = resnet18(num_classes=dataset_train.num_classes())
    elif parser.depth == 34:
        retinanet = resnet34(num_classes=dataset_train.num_classes())
    elif parser.depth == 50:
        retinanet = resnet50(num_classes=dataset_train.num_classes())
    elif parser.depth == 101:
        retinanet = resnet101(num_classes=dataset_train.num_classes())
    elif parser.depth == 152:
        retinanet = resnet152(num_classes=dataset_train.num_classes())
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if ckpt:
        retinanet = torch.load('')
        print('Loading checkpoint')
    else:
        print('Loading pretrained model')
        retinanet_dict = retinanet.state_dict()
        if parser.pretrained is None:
            pretrained_dict = model_zoo.load_url(model_urls['resnet' +
                                                            str(parser.depth)])
        else:
            pretrained_dict = torch.load('./weight/' + parser.pretrained)
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in retinanet_dict
        }
        retinanet_dict.update(pretrained_dict)
        retinanet.load_state_dict(retinanet_dict)
        print('load pretrained backbone')

    print(retinanet)
    retinanet = torch.nn.DataParallel(retinanet, device_ids=[0])
    if is_cuda:
        retinanet.cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
    # optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    if parser.parallel:
        retinanet.module.freeze_bn()
    else:
        retinanet.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))
    f_map = open('./mAP_txt/' + parser.model_name + '.txt', 'a')
    writer = SummaryWriter(log_dir='./summary')
    iters = 0
    for epoch_num in range(0, parser.epochs):

        retinanet.train()
        if parser.parallel:
            retinanet.module.freeze_bn()
        else:
            retinanet.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):

            iters += 1

            optimizer.zero_grad()

            img_data = data['img'].float()
            annot_data = data['annot']
            if is_cuda:
                img_data = img_data.cuda()
                annot_data = annot_data.cuda()

            classification_loss, regression_loss, mask_loss = retinanet(
                [img_data, annot_data])

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()
            mask_loss = mask_loss.mean()

            loss = classification_loss + regression_loss + mask_loss

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | '
                'mask_loss {:1.5f} | Running loss: {:1.5f}'.format(
                    epoch_num, iter_num, float(classification_loss),
                    float(regression_loss), float(mask_loss),
                    np.mean(loss_hist)))

            writer.add_scalar('classification_loss',
                              float(classification_loss), iters)
            writer.add_scalar('regression_loss', float(regression_loss), iters)
            writer.add_scalar('loss', float(loss), iters)

            del classification_loss
            del regression_loss
            del loss

        if parser.wider_val is not None:
            print('Evaluating dataset')

            mAP = evaluate(dataset_val, retinanet, is_cuda=is_cuda)
            f_map.write('mAP:{}, epoch:{}'.format(mAP[0][0], epoch_num))
            f_map.write('\n')

        scheduler.step(np.mean(epoch_loss))

        if parser.parallel:
            torch.save(
                retinanet.module,
                './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))
        else:
            torch.save(
                retinanet,
                './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num))

    retinanet.eval()

    writer.export_scalars_to_json(
        "./summary/' + parser.pretrained + 'all_scalars.json")
    f_map.close()
    writer.close()
Пример #10
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes_general',
                        help='Path to file containing class list (see readme)')
    parser.add_argument('--csv_features',
                        help='Path to dir containing features csv files')
    parser.add_argument('--csv_colors',
                        help='Path to file containing color classes')
    parser.add_argument('--csv_types',
                        help='Path to file containing type classes')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument(
        '--image_dir',
        help='Path to file containing images (optional, see readme)')
    parser.add_argument('--pretrain_model', help='Path to model (.pt) file.')
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders

    if parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes_general is None:
            raise ValueError(
                'Must provide --csv_classes_general when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes_general,
                                   color_classes=parser.csv_colors,
                                   type_classes=parser.csv_types,
                                   feature_class_dir=parser.csv_features,
                                   image_dir=parser.image_dir,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes_general,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        # retinanet = nn.DataParallel(retinanet)
        # torch.cuda.set_device(0)
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True
    if parser.pretrain_model is not None:
        retinanet = torch.load(parser.pretrain_model)
        print('load model: ' + str(parser.pretrain_model))

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module,
                   '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt'.format(epoch_num))
def train(csv_train=None, csv_classes=None, csv_val=None, epochs=12, depth=50, batch_size=2):

	dataset = "csv"

	# Create the data loaders
	if dataset == 'csv':

		if csv_train is None:
			raise ValueError('Must provide --csv_train when training on COCO,')

		if csv_classes is None:
			raise ValueError('Must provide --csv_classes when training on COCO,')


		dataset_train = CSVDataset(train_file=csv_train, class_list=csv_classes, transform=transforms.Compose([RandomHorizontalFlip(0.3),RandomRotation(6),Gamma_Correction(0.2), Image_Noise(0.2), Blur(0.2) , Normalizer(), Augmenter(), Resizer()]))

		if csv_val is None:
			dataset_val = None
			print('No validation annotations provided.')
		else:
			dataset_val = CSVDataset(train_file=csv_val, class_list=csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

	else:
		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

	sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=False)
	dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

	if dataset_val is not None:
		sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
		dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

	# Create the model
	if depth == 18:
		retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
	elif depth == 34:
		retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
	elif depth == 50:
		retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
	elif depth == 101:
		retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
	elif depth == 152:
		retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
	else:
		raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')		

	use_gpu = True

	if use_gpu:
		retinanet = retinanet.cuda()
	
	retinanet = torch.nn.DataParallel(retinanet).cuda()

	retinanet.training = True

	optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

	scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

	loss_hist = collections.deque(maxlen=500)

	retinanet.train()
	retinanet.module.freeze_bn()

	print('Num training images: {}'.format(len(dataset_train)))

	# Change
	total_loss_data = []
	class_loss_data = []
	reg_loss_data = []
	# Change

	for epoch_num in range(epochs):

		retinanet.train()
		retinanet.module.freeze_bn()


		epoch_loss = []

		# Change
		epoch_reg_loss = []
		epoch_class_loss = []
		# Change


		for iter_num, data in enumerate(dataloader_train):
			try:
				optimizer.zero_grad()

				classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])

				classification_loss = classification_loss.mean()
				regression_loss = regression_loss.mean()

				loss = classification_loss + regression_loss
				
				if bool(loss == 0):
					continue

				loss.backward()

				torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

				optimizer.step()

				loss_hist.append(float(loss))

				epoch_loss.append(float(loss))

				# Change
				epoch_reg_loss.append(float(regression_loss))
				epoch_class_loss.append(float(classification_loss))
				# Change

				print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
				
				del classification_loss
				del regression_loss
			except Exception as e:
				print(e)
				continue


		if dataset == 'csv' and csv_val is not None:

			print('Evaluating dataset')

			mAP = csv_eval.evaluate(dataset_val, retinanet)

		# Change
		total_loss_data.append(np.mean(epoch_loss))
		class_loss_data.append(np.mean(epoch_class_loss))
		reg_loss_data.append(np.mean(epoch_reg_loss))
		print("Epoch loss", total_loss_data)
		print("Epoch loss - classification", class_loss_data)
		print("Epoch loss - Regression", reg_loss_data)
		# Change
		scheduler.step(np.mean(epoch_loss))	

		torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(dataset, epoch_num))

	retinanet.eval()

	torch.save(retinanet, 'model_final.pt'.format(epoch_num))

	# Change
	import matplotlib.pyplot as plt
	plt.plot(total_loss_data, label='Total loss')
	plt.plot(class_loss_data, label='Classification loss')
	plt.plot(reg_loss_data, label='Regression loss')
	plt.ylabel("Loss")
	plt.xlabel("Epoch")
	plt.title("Epoch losses")
	plt.legend()
	plt.show()
Пример #12
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument(
        '--train-file',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--classes-file',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--val-file',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--title', type=str, default='')
    parser.add_argument("--resume_model", type=str, default="")
    parser.add_argument("--resume_epoch", type=int, default=0)
    parser.add_argument("--reinit-classifier",
                        action="store_true",
                        default=False)
    parser.add_argument("--lr", type=float, default=.00001)
    parser.add_argument("--all-box-regression",
                        action="store_true",
                        default=False)
    parser.add_argument("--batch-size", type=int, default=16)

    parser = parser.parse_args(args)

    log_dir = "./runs/" + parser.title
    writer = SummaryWriter(log_dir)

    #pdb.set_trace()

    with open(log_dir + '/config.csv', 'w') as f:
        for item in vars(parser):
            print(item + ',' + str(getattr(parser, item)))
            f.write(item + ',' + str(getattr(parser, item)) + '\n')

    if not os.path.isdir(log_dir + "/checkpoints"):
        os.makedirs(log_dir + "/checkpoints")

    if not os.path.isdir(log_dir + '/map_files'):
        os.makedirs(log_dir + '/map_files')

    dataset_train = CSVDataset(train_file=parser.train_file,
                               class_list=parser.classes_file,
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))

    if parser.val_file is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        dataset_val = CSVDataset(train_file=parser.val_file,
                                 class_list=parser.classes_file,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=True)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=parser.batch_size,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=8,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if parser.resume_model:
        x = torch.load(parser.resume_model)
        if parser.reinit_classifier:
            dummy = nn.Conv2d(256,
                              9 * dataset_train.num_classes(),
                              kernel_size=3,
                              padding=1)
            x['classificationModel.output.weight'] = dummy.weight.clone()
            x['classificationModel.output.bias'] = dummy.bias.clone()
            prior = 0.01
            x['classificationModel.output.weight'].data.fill_(0)
            x['classificationModel.output.bias'].data.fill_(-math.log(
                (1.0 - prior) / prior))
        retinanet.load_state_dict(x)

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()
    #torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    # x = torch.load('./csv_retinanet_20.pth')
    # retinanet.module.load_state_dict(x)

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.resume_epoch, parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []
        i = 0
        avg_class_loss = 0.0
        avg_reg_loss = 0.0

        for iter_num, data in enumerate(dataloader_train):
            i += 1

            try:
                optimizer.zero_grad()

                #pdb.set_trace()

                shape = data['img'].shape[2] * data['img'].shape[3]
                writer.add_scalar("train/image_shape", shape,
                                  epoch_num * (len(dataloader_train)) + i)

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot'].cuda().float()])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                avg_class_loss += classification_loss
                avg_reg_loss += regression_loss

                if i % 100 == 0:
                    writer.add_scalar("train/classification_loss",
                                      avg_class_loss / 100,
                                      epoch_num * (len(dataloader_train)) + i)
                    writer.add_scalar("train/regression_loss",
                                      avg_reg_loss / 100,
                                      epoch_num * (len(dataloader_train)) + i)
                    avg_class_loss = 0.0
                    avg_reg_loss = 0.0

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if epoch_num % 2 == 0:

            print('Evaluating dataset')

            retinanet.eval()
            mAP, AP_string = csv_eval.evaluate(dataset_val,
                                               retinanet.module,
                                               score_threshold=0.1)
            with open(
                    log_dir + '/map_files/retinanet_{}.txt'.format(epoch_num),
                    'w') as f:
                f.write(AP_string)
            total = 0.0
            all = 0.0
            total_unweighted = 0.0
            for c in mAP:
                total += mAP[c][0] * mAP[c][1]
                total_unweighted += mAP[c][0]
                all += mAP[c][1]
            writer.add_scalar("val/mAP", total / all, epoch_num)
            writer.add_scalar("val/mAP_unweighted",
                              total_unweighted / len(mAP), epoch_num)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module.state_dict(),
                   log_dir + '/checkpoints/retinanet_{}.pth'.format(epoch_num))

    retinanet.eval()

    torch.save(retinanet.module.state_dict(),
               log_dir + '/checkpoints/model_final.pth'.format(epoch_num))
Пример #13
0
sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
dataloader_train = DataLoader(dataset_train,
                              num_workers=3,
                              collate_fn=collater,
                              batch_sampler=sampler)

if dataset_val is not None:
    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=3,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                           pretrained=True)
retinanet.load_state_dict(state_dict, strict=False)

use_gpu = True

if use_gpu:
    retinanet = retinanet.cuda()

retinanet = torch.nn.DataParallel(retinanet).cuda()

retinanet.training = True

optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
Пример #14
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--title', type=str, default='')
    parser.add_argument("--resume_model", type=str, default="")
    parser.add_argument("--resume_epoch", type=int, default=0)

    parser = parser.parse_args(args)

    title = parser.resume_model.split('.')[0]

    log_dir = "./runs/" + title
    writer = SummaryWriter(log_dir)

    if not os.path.isdir(log_dir + "/checkpoints"):
        os.makedirs(log_dir + "/checkpoints")

    if not os.path.isdir(log_dir + '/map_files'):
        os.makedirs(log_dir + '/map_files')

    if parser.dataset == 'csv':

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))
    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=0,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_val.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_val.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_val.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_val.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_val.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if parser.resume_model:
        retinanet.load_state_dict(torch.load(parser.resume_model))

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    theshes = [.05, 0.1, 0.2, 0.3]

    i = 0
    for thresh in theshes:
        i = i + 1

        retinanet.eval()

        print('Evaluating dataset')

        mAP, AP_string = csv_eval.evaluate(dataset_val,
                                           retinanet,
                                           score_threshold=thresh)
        with open(
                log_dir + '/map_files/{}_retinanet_{}.txt'.format(
                    parser.dataset, thresh), 'w') as f:
            f.write(AP_string)
        total = 0.0
        all = 0.0
        total_unweighted = 0.0
        for c in mAP:
            total += mAP[c][0] * mAP[c][1]
            total_unweighted += mAP[c][0]
            all += mAP[c][1]
        writer.add_scalar("thresh_finder/mAP", total / all, i)
        writer.add_scalar("thresh_finder/mAP_unweighted",
                          total_unweighted / len(mAP), i)
Пример #15
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Training a RetinaNet network.')
    parser.add_argument('--csv_train',
                        help='Path to file containing training annotations')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list')
    parser.add_argument('--csv_val',
                        help='Path to file containing validation \
                        annotations')
    parser.add_argument("--depth",
                        help='Resnet depth, must be one of \
                        18, 34, 50,101, 152',
                        type=int,
                        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs to run',
                        type=int,
                        default=100)
    parser.add_argument('--batch_size',
                        help='Number of training sample per batch',
                        type=int,
                        default=16)
    parser.add_argument('--score_thresh',
                        help='score threshold to discard \
                        background/reduce nms processing time',
                        default=0.05)
    parser.add_argument("--iou_nms1",
                        help="iou for nms used during validation and \
                        inference",
                        type=float,
                        default=0.3)
    parser.add_argument('--lr', help='learning rate', type=float, default=6e-4)
    parser.add_argument('--pretrained', type=bool, default=False)
    parser.add_argument('--logfile')

    args = parser.parse_args(args)

    outputdir = os.path.dirname(args.logfile)
    if not os.path.isdir(outputdir): os.makedirs(outputdir)

    # Create the data loaders
    if args.csv_train is None:
        raise ValueError('Must provide --csv_train when training on CSV,')

    if args.csv_classes is None:
        raise ValueError('Must provide --csv_classes when training on CSV,')

    dataset_train = CSVDataset(train_file=args.csv_train,
                               class_list=args.csv_classes,
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))

    if args.csv_val is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        dataset_val = CSVDataset(train_file=args.csv_val,
                                 class_list=args.csv_classes,
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))

    dataloader_train = DataLoader(dataset_train,
                                  batch_size=args.batch_size,
                                  num_workers=3,
                                  collate_fn=collater,
                                  shuffle=True)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if args.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=args.pretrained)
    elif args.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=args.pretrained)
    elif args.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=args.pretrained)
    elif args.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=args.pretrained)
    elif args.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=args.pretrained)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    retinanet = retinanet.cuda()
    retinanet = torch.nn.DataParallel(retinanet).cuda()
    retinanet.training = True
    retinanet.score_thresh = args.score_thresh
    retinanet.iou_nms1 = args.iou_nms1

    optimizer = optim.Adam(retinanet.parameters(), lr=args.lr)

    # # LR Finder
    # lr_finder = LRFinder(retinanet, optimizer, losses.FocalLossQ, device="cuda")
    # lr_finder.range_test(dataloader_train, end_lr=10, num_iter=1260, diverge_th=10)
    # Ir_finder.plot(skip_start=0, skip_end=3, show_lr=3e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)
    print("Num training images: {}".format(len(dataset_train)))

    for epoch_num in range(args.epochs):

        retinanet.train()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            optimizer.zero_grad()

            classification_loss, regression_loss = retinanet(
                [data['img'].cuda().float(), data['annot']])

            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()

            loss = classification_loss + regression_loss

            if bool(loss == 0):
                continue

            loss.backward()

            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

            optimizer.step()

            loss_hist.append(float(loss))

            epoch_loss.append(float(loss))

            print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | '
                  'Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                      epoch_num, iter_num, float(classification_loss),
                      float(regression_loss), np.mean(loss_hist)))

            del classification_loss
            del regression_loss

        if args.csv_val is not None:
            mAP = csv_eval.evaluate(dataset_val, retinanet)
            with open(args.logfile, mode='a') as f:
                f.write("mAP:\n")
                aps = []
                for i, label_name in enumerate(dataset_val.classes):
                    f.write('{}: {}| Count: {}\n'.format(
                        label_name, mAP[i][0], mAP[i][1]))
                    aps.append(mAP[i][0])
                f.write('mAP: {}\n'.format(np.mean(aps)))

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module,
                   '{}/retinanet_{}.pt'.format(outputdir, epoch_num))
        torch.save(retinanet.module.state_dict(),
                   '{}/statedict_{}.pt'.format(outputdir, epoch_num))

    retinanet.eval()
Пример #16
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')
    #add a bunch of arguments(customized by Yu Han Huang)
    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument('--model', default='None')
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--resnext',
                        help='change backbone to resnext101',
                        action='store_true')
    parser.add_argument('--epochs',
                        help='Number of Epochs',
                        type=int,
                        default=12)
    parser.add_argument('--batch_size', help='Batch Size', type=int, default=4)
    parser.add_argument('--workers',
                        help='Number of Workers',
                        type=int,
                        default=4)
    parser.add_argument('--lr',
                        help='Learning Rate for training',
                        type=float,
                        default=1e-5)
    parser.add_argument(
        '--dropout1',
        help='Dropout Rate for layer dropout1 in ClassficationModel',
        type=float,
        default=0.25)
    parser.add_argument(
        '--dropout2',
        help='Dropout Rate for layer dropout2 in ClassficationModel',
        type=float,
        default=0.25)
    parser.add_argument(
        '--angle',
        help='Angle of pictures while implementing Data Augmentation',
        type=float,
        default=6)
    parser.add_argument('--size',
                        help='The length of the side of pictures',
                        type=int,
                        default=512)
    parser.add_argument(
        '--zoom_range',
        help=
        'Zoom Range of pictures while implementing Data Augmentation. Please type two arguments for this one.',
        nargs='+',
        type=float,
        default=[-0.1, 0.1])
    parser.add_argument('--alpha',
                        help='Alpha for focal loss',
                        type=float,
                        default=0.25)
    parser.add_argument('--gamma',
                        help='Gamma for focal loss',
                        type=float,
                        default=2)
    parser.add_argument('--loss_with_no_bboxes', action='store_true')
    parser.add_argument('--no_bboxes_alpha',
                        help='Alpha for focal loss',
                        type=float,
                        default=0.5)
    parser.add_argument('--no_bboxes_gamma',
                        help='Gamma for focal loss',
                        type=float,
                        default=2)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on CSV,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on CSV,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose([
                                       Normalizer(),
                                       Augmenter(angle=parser.angle),
                                       Resizer(zoom_range=parser.zoom_range,
                                               side=parser.side)
                                   ]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          ValResizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=parser.batch_size,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=parser.workers,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    # add arguments dropout1, dropout2, alpha, gamma, loss_with_no_bboxes, no_bboxes_alpha, no_bboxes_gamma(customized by Yu Han Huang)
    if parser.resnext == False:
        if parser.depth == 18:
            retinanet = model.resnet18(
                num_classes=dataset_train.num_classes(),
                pretrained=True,
                dropout1=parser.dropout1,
                dropout2=parser.dropout2,
                alpha=parser.alpha,
                gamma=parser.gamma,
                loss_with_no_bboxes=parser.loss_with_no_bboxes,
                no_bboxes_alpha=parser.no_bboxes_alpha,
                no_bboxes_gamma=parser.no_bboxes_gamma)
        elif parser.depth == 34:
            retinanet = model.resnet34(
                num_classes=dataset_train.num_classes(),
                pretrained=True,
                dropout1=parser.dropout1,
                dropout2=parser.dropout2,
                alpha=parser.alpha,
                gamma=parser.gamma,
                loss_with_no_bboxes=parser.loss_with_no_bboxes,
                no_bboxes_alpha=parser.no_bboxes_alpha,
                no_bboxes_gamma=parser.no_bboxes_gamma)
        elif parser.depth == 50:
            retinanet = model.resnet50(
                num_classes=dataset_train.num_classes(),
                pretrained=True,
                dropout1=parser.dropout1,
                dropout2=parser.dropout2,
                alpha=parser.alpha,
                gamma=parser.gamma,
                loss_with_no_bboxes=parser.loss_with_no_bboxes,
                no_bboxes_alpha=parser.no_bboxes_alpha,
                no_bboxes_gamma=parser.no_bboxes_gamma)
        elif parser.depth == 101:
            retinanet = model.resnet101(
                num_classes=dataset_train.num_classes(),
                pretrained=True,
                dropout1=parser.dropout1,
                dropout2=parser.dropout2,
                alpha=parser.alpha,
                gamma=parser.gamma,
                loss_with_no_bboxes=parser.loss_with_no_bboxes,
                no_bboxes_alpha=parser.no_bboxes_alpha,
                no_bboxes_gamma=parser.no_bboxes_gamma)
        elif parser.depth == 152:
            retinanet = model.resnet152(
                num_classes=dataset_train.num_classes(),
                pretrained=True,
                dropout1=parser.dropout1,
                dropout2=parser.dropout2,
                alpha=parser.alpha,
                gamma=parser.gamma,
                loss_with_no_bboxes=parser.loss_with_no_bboxes,
                no_bboxes_alpha=parser.no_bboxes_alpha,
                no_bboxes_gamma=parser.no_bboxes_gamma)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')
    else:
        if parser.depth == 101:
            retinanet = model.resnext101(
                num_classes=dataset_train.num_classes(),
                pretrained=True,
                dropout1=parser.dropout1,
                dropout2=parser.dropout2,
                alpha=parser.alpha,
                gamma=parser.gamma,
                loss_with_no_bboxes=parser.loss_with_no_bboxes,
                no_bboxes_alpha=parser.no_bboxes_alpha,
                no_bboxes_gamma=parser.no_bboxes_gamma)

    use_gpu = True

    if parser.model != 'None':
        retinanet = torch.load(parser.model)

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()
        print_activate = 0
        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss
                #print(classification_loss, regression_loss)
                if bool(loss == 0):
                    continue

                loss.backward()
                print_activate += 1
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                if print_activate % 15 == 0:
                    print(
                        'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), np.mean(loss_hist)))

                del loss
                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            '{}_retinanet_resnext_v4_{}.pt'.format(parser.dataset, epoch_num))

        if parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt'.format(epoch_num))