Exemplo n.º 1
0
def main(args):

    create_time = time.strftime('%Y%m%d_%H%M', time.localtime(time.time()))
    save_folder_path = os.path.join(args.save_folder, create_time)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # dataset = customDetection(root = args.image_root,
    #                           json_path = args.annotation,
    #                           transform = BaseTransform(img_size = args.image_size),
    #                           target_transform = customAnnotationTransform())

    dataset = COCODetection(root=args.image_root,
                            annotation_json=args.annotation,
                            transform=BaseTransform(img_size=args.image_size),
                            target_transform=COCOAnnotationTransform)

    dataloader = DataLoader(dataset=dataset,
                            batch_size=4,
                            shuffle=True,
                            collate_fn=detection_collate)

    n_classes = dataset.get_class_number() + 1
    print("Detect class number: {}".format(n_classes))

    ## write category id to label name map
    dataset.get_class_map()

    model = mobilenetv3(n_classes=n_classes)
    ssd = ssd_mobilenetv3(model, n_classes)

    if args.pretrain_model_path:
        ssd.load_state_dict(torch.load(args.pretrain_model_path))

    # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
    biases = list()
    not_biases = list()
    for param_name, param in model.named_parameters():
        if param.requires_grad:
            if param_name.endswith('.bias'):
                biases.append(param)
            else:
                not_biases.append(param)

    optimizer = torch.optim.SGD(params=[{
        'params': biases,
        'lr': args.learning_rate
    }, {
        'params': not_biases
    }],
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    ssd = ssd.to(device)
    criterion = MultiBoxLossV3(ssd.priors_cxcy, args.threshold,
                               args.neg_pos_ratio).to(device)

    print(f"epochs: {args.epochs}")
    for param_group in optimizer.param_groups:
        optimizer.param_groups[1]['lr'] = args.learning_rate
    print(f"learning rate. The new LR is {optimizer.param_groups[1]['lr']}")

    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=0.1,
                                  patience=15,
                                  verbose=True,
                                  threshold=0.00001,
                                  threshold_mode='rel',
                                  cooldown=0,
                                  min_lr=0,
                                  eps=1e-08)

    n_train = min(dataset.__len__(), 5000)
    global_step = 0
    writer = SummaryWriter()

    for epoch in range(args.epochs):
        mean_loss = 0
        inference_count = 0
        ssd.train()
        mean_count = 0
        with tqdm(total=n_train,
                  desc=f"{epoch + 1} / {args.epochs}",
                  unit='img') as pbar:
            for img, target in dataloader:
                img = img.to(device)
                # target = [anno.to(device) for anno in target]
                # print(target)
                # boxes = target[:, :-1]
                # labels = target[:, -1]

                boxes = [anno.to(device)[:, :-1] for anno in target]
                labels = [anno.to(device)[:, -1] for anno in target]

                prediction_location_loss, prediction_confidence_loss = ssd(img)
                loss = criterion(prediction_location_loss,
                                 prediction_confidence_loss, boxes, labels)
                pbar.set_postfix(**{"loss ": float(loss)})
                mean_loss += float(loss)
                mean_count += 1
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                pbar.update(img.shape[0])

        scheduler.step(mean_loss)
        writer.add_scalar('Train/Loss', float(mean_loss / mean_count),
                          global_step)
        global_step += 1

        if epoch % 10 == 0 or epoch == args.epochs - 1:
            save_model(save_folder_path, ssd, epoch)

    writer.close()
Exemplo n.º 2
0
def train(args):
    create_time = time.strftime('%Y%m%d_%H%M', time.localtime(time.time()))
    save_folder_path = os.path.join(args.save_folder, create_time)

    # n_classes = [20, 80][args.dataset == 'COCO']
    # n_classes = 91

    if not ((args.train_image_folder and args.val_image_folder)
            or args.annotation):
        print("train/val image folder and annotation should not be None")
        return

    train_dataset = COCODetection(
        root=args.root,
        image_set=args.train_image_folder,
        annotation_json=args.annotation,
        transform=SSDAugmentation(img_size=args.image_size),
        # transform = BaseTransform(img_size = args.image_size),
        target_transform=COCOAnnotationTransform())

    train_dataloader = DataLoader(dataset=train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  collate_fn=detection_collate)

    val_dataset = COCODetection(
        root=args.root,
        image_set=args.val_image_folder,
        annotation_json=args.annotation,
        transform=BaseTransform(img_size=args.image_size),
        target_transform=COCOAnnotationTransform())

    n_classes = train_dataset.get_class_size() + 1

    if args.class_map_path:
        train_dataset.get_class_map(args.class_map_path)

    if args.model == "mobilenetv2":
        model = MobileNetv2(
            n_classes=n_classes,
            width_mult=args.width_mult,
            round_nearest=8,
            dropout_ratio=args.dropout_ratio,
            use_batch_norm=True,
        )

        ssd = create_mobilenetv2_ssd_lite(model,
                                          n_classes,
                                          width_mult=args.width_mult,
                                          use_batch_norm=True)

    elif args.model == "mobilenetv3":
        model = MobileNetv3(model_mode=args.model_mode,
                            n_classes=n_classes,
                            width_mult=args.width_mult,
                            dropout_ratio=args.dropout_ratio)

        ssd = create_mobilenetv3_ssd_lite(model,
                                          n_classes,
                                          model_mode=args.model_mode)

    else:
        print("model structure only accept mobilenetv2 or mobilenetv3")
        return
    print("builded ssd module")

    if GPU:
        import torch.backends.cudnn as cudnn
        model.cuda()
        ssd.cuda()
        cudnn.benchmark = True

    if args.pretrain_model:
        ssd.load_state_dict(
            torch.load(args.pretrain_model, map_location=torch.device('cpu')))

    elif args.pretrain_tfmodel and args.pretrain_tfmodel_weight_list:
        ssd_state_dict = ssd.state_dict()
        tf_weights_dict = load_tf_weights(args, ssd_state_dict)
        ssd.load_state_dict(tf_weights_dict)

    optimizer = optim.Adam(ssd.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)

    criterion = MultiBoxLoss(n_classes,
                             overlap_thresh=args.overlap_threshold,
                             prior_for_matching=True,
                             bkg_label=0,
                             neg_mining=True,
                             neg_pos=args.neg_pos_ratio,
                             neg_overlap=0.5,
                             encode_target=False)
    with torch.no_grad():

        if args.model == "mobilenetv2":
            prior_box = PriorBox(MOBILEV2_300)

        elif args.model == "mobilenetv3":
            prior_box = PriorBox(MOBILEV3_300)

        priors = Variable(prior_box.forward())
        print("created default bbox")

    n_train = min(train_dataset.__len__(), 5000)
    n_val = min(val_dataset.__len__(), 1000)
    global_step = 0
    val_global_step = 0
    writer = SummaryWriter(log_dir=args.summary_path)
    for epoch in range(args.epochs):
        mean_loss_conf = 0
        mean_loss_loc = 0
        inference_count = 0

        ssd.train()
        with tqdm(total=n_train,
                  desc=f"{epoch + 1} / {args.epochs}",
                  unit='img') as pbar:
            for img, target in train_dataloader:

                if GPU:
                    img = Variable(img.cuda())
                    target = [Variable(anno.cuda()) for anno in target]
                else:
                    img = Variable(img)
                    target = [Variable(anno) for anno in target]

                optimizer.zero_grad()

                inference = ssd(img)

                loss_loc, loss_conf = criterion(inference, priors, target)
                writer.add_scalar('Train/location_loss', float(loss_loc),
                                  global_step)
                writer.add_scalar('Train/confidence_loss', float(loss_conf),
                                  global_step)

                pbar.set_postfix(
                    **{
                        "location loss": float(loss_loc),
                        "confidence loss": float(loss_conf)
                    })

                mean_loss_loc += float(loss_loc)
                mean_loss_conf += float(loss_conf)

                total_loss = loss_loc + loss_conf
                total_loss.backward()

                # # clip gradient
                # # clip_grad_norm_(net.parameters(), 0.1)

                optimizer.step()
                pbar.update(img.shape[0])
                global_step += 1
                inference_count += img.shape[0]

                if inference_count > n_train: break
            pbar.set_postfix(
                **{
                    "location loss": float(mean_loss_loc / n_train),
                    "confidence loss": float(mean_loss_conf / n_train)
                })

        ssd.eval()
        val_mean_loss_loc = 0
        val_mean_loss_conf = 0
        with tqdm(total=n_val, desc="Validation", unit="img") as vpbar:
            for i in range(n_val):
                img = val_dataset.get_image(i)
                img = cv2.resize(img, (args.image_size, args.image_size))
                height, width, _ = img.shape
                target = val_dataset.get_annotation(i, width, height)

                if GPU:
                    img = torch.from_numpy(
                        np.expand_dims(img.transpose(2, 0, 1),
                                       0)).to(dtype=torch.float32).cuda()
                    target = torch.FloatTensor(target).unsqueeze(0).cuda()
                else:
                    img = torch.from_numpy(
                        np.expand_dims(img.transpose(2, 0, 1),
                                       0)).to(dtype=torch.float32)
                    target = torch.FloatTensor(target).unsqueeze(0)

                inference = ssd(img)
                loss_loc, loss_conf = criterion(inference, priors, target)

                val_mean_loss_loc += float(loss_loc)
                val_mean_loss_conf += float(loss_conf)
                vpbar.set_postfix(
                    **{
                        'location loss': float(loss_loc),
                        'confidnece loss': float(loss_conf)
                    })
                vpbar.update(1)

            vpbar.set_postfix(
                **{
                    'location loss': float(val_mean_loss_loc / n_val),
                    'confidnece loss': float(val_mean_loss_conf / n_val)
                })
            writer.add_scalar('Test/location_loss',
                              float(val_mean_loss_loc / n_val),
                              val_global_step)
            writer.add_scalar('Test/confidence_loss',
                              float(val_mean_loss_conf / n_val),
                              val_global_step)
        val_global_step += 1

        if epoch % 10 == 0 or epoch == args.epochs - 1:
            save_model(save_folder_path, ssd, epoch)
    writer.close()