示例#1
0
def train(net, lr, trainloader, epoch):
  """
  Train SSD

  @args
    net: (nn.Module) network
    lr: (float) learning rate
    trainloader: (DataLoader) dataloader
    epoch: (int) training epoch
  """
  net.train()
  optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4)
  criterion = MultiBoxLoss(num_classes=config[args.dataset]['num_classes']+1)

  progress_bar = ProgressBar(total=len(trainloader))
  train_loss = 0
  torch.set_printoptions(threshold=10000)
  for batch_idx, (images, loc_targets, conf_targets) in enumerate(trainloader):
    images = Variable(images.cuda())
    loc_targets = Variable(loc_targets.cuda())
    conf_targets = Variable(conf_targets.cuda())

    optimizer.zero_grad()
    loc_preds, conf_preds = net(images)
    loc_loss, conf_loss, loss = criterion(loc_preds, loc_targets, conf_preds, conf_targets)
    loss.backward()
    optimizer.step()

    writer.add_scalar('train/loss_loc', loc_loss, batch_idx + epoch * len(trainloader))
    writer.add_scalar('train/loss_conf', conf_loss, batch_idx + epoch * len(trainloader))
    writer.add_scalar('train/loss_total', loss, batch_idx + epoch * len(trainloader))

    train_loss += loss.item()
    progress_bar.move(leftmsg="training epoch " + str(epoch), rightmsg="loss: %.6f" % (train_loss/(batch_idx+1)))
示例#2
0
    def __init__(self, net, checkpoint, cfg):
        super().__init__("TrackerDefault")

        self.cfg = cfg

        self.net = net
        if checkpoint is not None:
            utils.load_checkpoint(checkpoint, self.net)

        self.net.eval()

        self.anchors = utils.generate_anchors(cfg)

        if torch.cuda.is_available():
            self.net.cuda()
            self.anchors = self.anchors.cuda()

        self.z_transform = Compose([
            ToAbsoluteCoords(),
            Crop(context_amount=cfg.TRAIN.CROP_CONTEXT_AMOUNT_Z, make_square=False),
            ToPercentCoords(),
            Resize(cfg.MODEL.Z_SIZE),
        ])

        self.x_crop = Crop(context_amount=cfg.TRAIN.CROP_CONTEXT_AMOUNT_X, return_rect=True, make_square=True)
        self.x_resize = Resize(size=cfg.MODEL.X_SIZE)

        self.z_crop = Crop(context_amount=cfg.TRAIN.CROP_CONTEXT_AMOUNT_Z, return_rect=True, make_square=False)
        self.z_resize = Resize(size=cfg.MODEL.Z_SIZE)

        self.criterion = MultiBoxLoss(self.anchors, self.cfg)
示例#3
0
def train():
    print('start training ...........')
    batch_size = 32
    num_epochs = 600
    lr = 0.001

    device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
    model = SSD_VGG(num_classes=21, device=device, freeze=False)
    # model.load_state_dict(torch.load('output/weight.pth', map_location=device))
    train_loader, val_loader = get_loader(batch_size=batch_size)

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=0.0005)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=50)
    criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy, device=device)

    train_losses, val_losses = [], []
    for epoch in range(num_epochs):
        train_epoch_loss = fit(epoch, model, optimizer, criterion, device, train_loader, phase='training')
        val_epoch_loss = fit(epoch, model, optimizer, criterion, device, val_loader, phase='validation')
        print('-----------------------------------------')

        if epoch == 0 or val_epoch_loss <= np.min(val_losses):
            torch.save(model.state_dict(), 'output/weight.pth')

        # if epoch == 0 or train_epoch_loss <= np.min(train_losses):
        #     torch.save(model.state_dict(), 'output/weight.pth')

        train_losses.append(train_epoch_loss)
        val_losses.append(val_epoch_loss)

        write_figure('output', train_losses, val_losses)
        write_log('output', epoch, train_epoch_loss, val_epoch_loss)

        scheduler.step(val_epoch_loss)
示例#4
0
def test(net, testloader, epoch):
  net.eval()
    
  criterion = MultiBoxLoss(num_classes=config[args.dataset]['num_classes']+1)
  progress_bar = ProgressBar(total=len(testloader))

  test_loss = 0
  for batch_idx, (images, loc_targets, conf_targets) in enumerate(testloader):
    images = Variable(images.cuda())
    loc_targets = Variable(loc_targets.cuda())
    conf_targets = Variable(conf_targets.cuda())
    
    with torch.no_grad():
      loc_preds, conf_preds = net(images)
      _, _, loss = criterion(loc_preds, loc_targets, conf_preds, conf_targets)

    test_loss += loss.item()
    progress_bar.move(leftmsg="test epoch " + str(epoch), rightmsg="loss: %.6f" % (test_loss/(batch_idx+1)))

  writer.add_scalar('test/loss', loss, epoch)
  return test_loss / len(testloader)  # average
示例#5
0
    def __init__(self, net_path=None, **kargs):
        super(TrackerSiamRPN, self).__init__(name='SiamRPN',
                                             is_deterministic=True)
        '''setup GPU device if available'''
        self.cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.cuda else 'cpu')
        '''setup model'''
        self.net = SiamRPN()
        if self.cuda:
            self.net = self.net.cuda()

        if net_path is not None:
            self.net.load_state_dict(
                torch.load(net_path,
                           map_location=lambda storage, loc: storage))
        #self.net = self.net.to(self.device)
        '''setup optimizer'''
        self.criterion = MultiBoxLoss()

        self.optimizer = torch.optim.SGD(self.net.parameters(),
                                         lr=config.lr,
                                         momentum=config.momentum,
                                         weight_decay=config.weight_decay)
示例#6
0
def main(args):
    # Model parameters
    # Not too many here since the SSD300 has a very specific structure
    with open(args.config_file_path, "r") as fp:
        config = json.load(fp)

    n_classes = len(label_map)  # number of different types of objects
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    #Mobilenetv2
    #normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
    #                                 std=[0.229, 0.224, 0.225])

    # Learning parameters
    checkpoint = None  # path to model checkpoint, None if none
    batch_size = config['batch_size']  # batch size
    start_epoch = 0  # start at this epoch
    epochs = config[
        'n_epochs']  # number of epochs to run without early-stopping
    epochs_since_improvement = 0  # number of epochs since there was an improvement in the validation metric
    best_loss = 100.  # assume a high loss at first
    workers = 2  # number of workers for loading data in the DataLoader
    lr = config['lr']  # learning rate
    momentum = 0.9  # momentum
    weight_decay = config['weight_decay']  # weight decay
    grad_clip = None  # clip if g
    backbone_network = config['backbone_network']

    model = SSD(num_classes=n_classes, backbone_network=backbone_network)
    # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
    biases = list()
    not_biases = list()
    param_names_biases = list()
    param_names_not_biases = list()
    for param_name, param in model.named_parameters():
        if param.requires_grad:
            if param_name.endswith('.bias'):
                biases.append(param)
                param_names_biases.append(param_name)
            else:
                not_biases.append(param)
                param_names_not_biases.append(param_name)
    optimizer = torch.optim.SGD(params=[{
        'params': biases,
        'lr': 2 * lr
    }, {
        'params': not_biases
    }],
                                lr=lr,
                                momentum=momentum,
                                weight_decay=weight_decay)

    model = model.to(device)
    criterion = MultiBoxLoss(priors_cxcy=model.priors).to(device)

    #voc07_path = 'VOCdevkit/VOC2007'
    voc07_path = config['voc07_path']

    #voc12_path = 'VOCdevkit/VOC2012'
    # voc12_path = config['voc12_path']
    #from utils import create_data_lists

    create_data_lists(voc07_path, output_folder=config['data_folder'])

    #data_folder = 'VOC/VOCdevkit/'
    data_folder = config['data_folder']
    train_dataset = PascalVOCDataset(data_folder,
                                     split='train',
                                     keep_difficult=keep_difficult)
    val_dataset = PascalVOCDataset(data_folder,
                                   split='test',
                                   keep_difficult=keep_difficult)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=train_dataset.collate_fn,
        num_workers=workers,
        pin_memory=True)  # note that we're passing the collate function here
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             collate_fn=val_dataset.collate_fn,
                                             num_workers=workers,
                                             pin_memory=True)

    print(start_epoch)
    for epoch in range(start_epoch, epochs):
        # Paper describes decaying the learning rate at the 80000th, 100000th, 120000th 'iteration', i.e. model update or batch
        # The paper uses a batch size of 32, which means there were about 517 iterations in an epoch
        # Therefore, to find the epochs to decay at, you could do,
        # if epoch in {80000 // 517, 100000 // 517, 120000 // 517}:
        #     adjust_learning_rate(optimizer, 0.1)

        # In practice, I just decayed the learning rate when loss stopped improving for long periods,
        # and I would resume from the last best checkpoint with the new learning rate,
        # since there's no point in resuming at the most recent and significantly worse checkpoint.
        # So, when you're ready to decay the learning rate, just set checkpoint = 'BEST_checkpoint_ssd300.pth.tar' above
        # and have adjust_learning_rate(optimizer, 0.1) BEFORE this 'for' loop

        # One epoch's training
        train(train_loader=train_loader,
              model=model,
              criterion=criterion,
              optimizer=optimizer,
              epoch=epoch,
              grad_clip=grad_clip)

        # One epoch's validation
        val_loss = validate(val_loader=val_loader,
                            model=model,
                            criterion=criterion)

        # Did validation loss improve?
        is_best = val_loss < best_loss
        best_loss = min(val_loss, best_loss)

        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))

        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, optimizer,
                        val_loss, best_loss, is_best)
示例#7
0
def train():
    """
    Introduction
    ------------
        训练Retinanet模型
    """
    train_transform = Augmentation(size=config.image_size)
    # train_dataset = COCODataset(config.coco_train_dir, config.coco_train_annaFile, config.coco_label_file, training = True, transform = train_transform)
    from VOCDataset import build_vocDataset
    train_dataset = build_vocDataset(config.voc_root)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=config.train_batch,
                                  shuffle=True,
                                  num_workers=2,
                                  collate_fn=train_dataset.collate_fn)
    print("training on {} samples".format(train_dataset.__len__()))
    net = RetinaNet(config.num_classes, pre_train_path=config.resnet50_path)
    net.cuda()
    optimizer = optim.SGD(net.parameters(),
                          lr=config.learning_rate,
                          momentum=0.9,
                          weight_decay=1e-4)
    criterion = MultiBoxLoss(alpha=config.focal_alpha,
                             gamma=config.focal_gamma,
                             num_classes=config.num_classes)
    anchors = Anchor(config.anchor_areas, config.aspect_ratio,
                     config.scale_ratios)
    anchor_boxes = anchors(input_size=config.image_size)
    for epoch in range(config.Epochs):
        batch_time, loc_losses, conf_losses = AverageTracker(), AverageTracker(
        ), AverageTracker()
        net.train()
        net.freeze_bn()
        end = time.time()
        for index, (image, gt_boxes, labels) in enumerate(train_dataloader):
            loc_targets, cls_targets = [], []
            image = image.cuda()
            loc_preds, cls_preds = net(image)
            batch_num = image.shape[0]
            for idx in range(batch_num):
                gt_box = gt_boxes[index]
                label = labels[index]
                loc_target, cls_target = encode(anchor_boxes, gt_box, label)
                loc_targets.append(loc_target)
                cls_targets.append(cls_target)
            loc_targets = torch.stack(loc_targets).cuda()
            cls_targets = torch.stack(cls_targets).cuda()
            loc_loss, cls_loss = criterion(loc_preds, loc_targets, cls_preds,
                                           cls_targets)
            loss = loc_loss + cls_loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loc_losses.update(loc_loss.item(), image.size(0))
            conf_losses.update(cls_loss.item(), image.size(0))
            batch_time.update(time.time() - end)
            end = time.time()
            if idx % config.print_freq == 0:
                print(
                    'Epoch: {}/{} Batch: {}/{} loc Loss: {:.4f} {:.4f} conf loss: {:.4f} {:.4f} Time: {:.4f} {:.4f}'
                    .format(epoch, config.Epochs, idx, len(train_dataloader),
                            loc_losses.val, loc_losses.avg, conf_losses.val,
                            conf_losses.avg, batch_time.val, batch_time.avg))
        if epoch % config.save_freq == 0:
            print('save model')
            torch.save(
                net.state_dict(),
                config.model_dir + 'train_model_epoch{}.pth'.format(epoch + 1))
def train(args):
    create_time = time.strftime('%Y%m%d_%H%M', time.localtime(time.time()))
    save_folder_path = os.path.join(args.save_folder, create_time)

    # n_classes = [20, 80][args.dataset == 'COCO']
    # n_classes = 91

    if not ((args.train_image_folder and args.val_image_folder)
            or args.annotation):
        print("train/val image folder and annotation should not be None")
        return

    train_dataset = COCODetection(
        root=args.root,
        image_set=args.train_image_folder,
        annotation_json=args.annotation,
        transform=SSDAugmentation(img_size=args.image_size),
        # transform = BaseTransform(img_size = args.image_size),
        target_transform=COCOAnnotationTransform())

    train_dataloader = DataLoader(dataset=train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  collate_fn=detection_collate)

    val_dataset = COCODetection(
        root=args.root,
        image_set=args.val_image_folder,
        annotation_json=args.annotation,
        transform=BaseTransform(img_size=args.image_size),
        target_transform=COCOAnnotationTransform())

    n_classes = train_dataset.get_class_size() + 1

    if args.class_map_path:
        train_dataset.get_class_map(args.class_map_path)

    if args.model == "mobilenetv2":
        model = MobileNetv2(
            n_classes=n_classes,
            width_mult=args.width_mult,
            round_nearest=8,
            dropout_ratio=args.dropout_ratio,
            use_batch_norm=True,
        )

        ssd = create_mobilenetv2_ssd_lite(model,
                                          n_classes,
                                          width_mult=args.width_mult,
                                          use_batch_norm=True)

    elif args.model == "mobilenetv3":
        model = MobileNetv3(model_mode=args.model_mode,
                            n_classes=n_classes,
                            width_mult=args.width_mult,
                            dropout_ratio=args.dropout_ratio)

        ssd = create_mobilenetv3_ssd_lite(model,
                                          n_classes,
                                          model_mode=args.model_mode)

    else:
        print("model structure only accept mobilenetv2 or mobilenetv3")
        return
    print("builded ssd module")

    if GPU:
        import torch.backends.cudnn as cudnn
        model.cuda()
        ssd.cuda()
        cudnn.benchmark = True

    if args.pretrain_model:
        ssd.load_state_dict(
            torch.load(args.pretrain_model, map_location=torch.device('cpu')))

    elif args.pretrain_tfmodel and args.pretrain_tfmodel_weight_list:
        ssd_state_dict = ssd.state_dict()
        tf_weights_dict = load_tf_weights(args, ssd_state_dict)
        ssd.load_state_dict(tf_weights_dict)

    optimizer = optim.Adam(ssd.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)

    criterion = MultiBoxLoss(n_classes,
                             overlap_thresh=args.overlap_threshold,
                             prior_for_matching=True,
                             bkg_label=0,
                             neg_mining=True,
                             neg_pos=args.neg_pos_ratio,
                             neg_overlap=0.5,
                             encode_target=False)
    with torch.no_grad():

        if args.model == "mobilenetv2":
            prior_box = PriorBox(MOBILEV2_300)

        elif args.model == "mobilenetv3":
            prior_box = PriorBox(MOBILEV3_300)

        priors = Variable(prior_box.forward())
        print("created default bbox")

    n_train = min(train_dataset.__len__(), 5000)
    n_val = min(val_dataset.__len__(), 1000)
    global_step = 0
    val_global_step = 0
    writer = SummaryWriter(log_dir=args.summary_path)
    for epoch in range(args.epochs):
        mean_loss_conf = 0
        mean_loss_loc = 0
        inference_count = 0

        ssd.train()
        with tqdm(total=n_train,
                  desc=f"{epoch + 1} / {args.epochs}",
                  unit='img') as pbar:
            for img, target in train_dataloader:

                if GPU:
                    img = Variable(img.cuda())
                    target = [Variable(anno.cuda()) for anno in target]
                else:
                    img = Variable(img)
                    target = [Variable(anno) for anno in target]

                optimizer.zero_grad()

                inference = ssd(img)

                loss_loc, loss_conf = criterion(inference, priors, target)
                writer.add_scalar('Train/location_loss', float(loss_loc),
                                  global_step)
                writer.add_scalar('Train/confidence_loss', float(loss_conf),
                                  global_step)

                pbar.set_postfix(
                    **{
                        "location loss": float(loss_loc),
                        "confidence loss": float(loss_conf)
                    })

                mean_loss_loc += float(loss_loc)
                mean_loss_conf += float(loss_conf)

                total_loss = loss_loc + loss_conf
                total_loss.backward()

                # # clip gradient
                # # clip_grad_norm_(net.parameters(), 0.1)

                optimizer.step()
                pbar.update(img.shape[0])
                global_step += 1
                inference_count += img.shape[0]

                if inference_count > n_train: break
            pbar.set_postfix(
                **{
                    "location loss": float(mean_loss_loc / n_train),
                    "confidence loss": float(mean_loss_conf / n_train)
                })

        ssd.eval()
        val_mean_loss_loc = 0
        val_mean_loss_conf = 0
        with tqdm(total=n_val, desc="Validation", unit="img") as vpbar:
            for i in range(n_val):
                img = val_dataset.get_image(i)
                img = cv2.resize(img, (args.image_size, args.image_size))
                height, width, _ = img.shape
                target = val_dataset.get_annotation(i, width, height)

                if GPU:
                    img = torch.from_numpy(
                        np.expand_dims(img.transpose(2, 0, 1),
                                       0)).to(dtype=torch.float32).cuda()
                    target = torch.FloatTensor(target).unsqueeze(0).cuda()
                else:
                    img = torch.from_numpy(
                        np.expand_dims(img.transpose(2, 0, 1),
                                       0)).to(dtype=torch.float32)
                    target = torch.FloatTensor(target).unsqueeze(0)

                inference = ssd(img)
                loss_loc, loss_conf = criterion(inference, priors, target)

                val_mean_loss_loc += float(loss_loc)
                val_mean_loss_conf += float(loss_conf)
                vpbar.set_postfix(
                    **{
                        'location loss': float(loss_loc),
                        'confidnece loss': float(loss_conf)
                    })
                vpbar.update(1)

            vpbar.set_postfix(
                **{
                    'location loss': float(val_mean_loss_loc / n_val),
                    'confidnece loss': float(val_mean_loss_conf / n_val)
                })
            writer.add_scalar('Test/location_loss',
                              float(val_mean_loss_loc / n_val),
                              val_global_step)
            writer.add_scalar('Test/confidence_loss',
                              float(val_mean_loss_conf / n_val),
                              val_global_step)
        val_global_step += 1

        if epoch % 10 == 0 or epoch == args.epochs - 1:
            save_model(save_folder_path, ssd, epoch)
    writer.close()
示例#9
0
train_set = loader.VOC_loader(root_dir, transform=transform)

# 4. data loader 정의
train_loader = torch.utils.data.DataLoader(train_set,
                                           batch_size=32,
                                           collate_fn=train_set.collate_fn,
                                           shuffle=True,
                                           num_workers=0)

# 5. model 정의
net = SSD().to(device)
net.train()

# 6. loss 정의
criterion = MultiBoxLoss().to(device)

# 7. optimizer 정의
optimizer = optim.Adam(net.parameters(), lr=0.001)
total_step = len(train_loader)

# 8. train
for epoch in range(30):

    epoch_time = time.time()
    for i, (images, labels) in enumerate(train_loader):

        images = images.to(device)
        labels = [l.to(device) for l in labels]
        # labels = labels.to(device)
示例#10
0
            # Filter top k objects that have largest confidence score
            if boxes[i].size(0) > top_k:
                scores[i], sort_ind = scores[i].sort(dim=0, descending=True)
                scores[i] = scores[i][:top_k]  # (top_k)
                boxes[i] = boxes[i][sort_ind[:top_k]]  # (top_k, 4)
                labels[i] = labels[i][sort_ind[:top_k]]  # (top_k)

        return boxes, labels, scores
    
    
    def inference(self, images, score_threshold, iou_threshold, top_k):
        ''' images: tensor size (N, 3, 300, 300), normalized
        '''
        predicted_offsets, predicted_scores = self.forward(images)
        return self.post_process_top_k(predicted_offsets, predicted_scores, score_threshold, iou_threshold, top_k)
        
    
if __name__ == "__main__":
    from loss import MultiBoxLoss
    torch.set_grad_enabled(False)
    
    MySSD300 = SSD300(n_classes = 21, vgg16_dir='models/')
    loss_func = MultiBoxLoss(priors_cxcy = MySSD300.priors_cxcy, threshold=0.5, neg_pos_ratio=3, alpha=1.)
    
    #loss = loss_func.forward(predicted_offsets, predicted_scores, boxes, labels)
    #print(loss.item())

    # test detect objects
    #boxes, labels, scores = MySSD300.detect_objects(predicted_offsets, predicted_scores, score_threshold=0.6, iou_threshold=0.5)
    #breakpoint()
def main(opt):
    """
    Training and validation.
    """
    global epochs_since_improvement, start_epoch, label_map, best_loss, epoch, checkpoint, lr_scheduler
    epochs_since_improvement = opt['epochs_since_improvement']
    start_epoch = opt['start_epoch']
    best_loss = opt['best_loss']
    checkpoint = opt['checkpoint']
    lr_scheduler = opt['lr_scheduler']

    batch_size = opt['batch_size']
    epochs = opt['epochs']
    lr = opt['lr']
    momentum = opt['momentum']
    weight_decay = opt['weight_decay']
    grad_clip = opt['grad_clip']
    workers = opt['workers']
    print_freq = opt['print_freq']

    root = opt['root']

    # Initialize model or load checkpoint
    if checkpoint is None:
        model = DSOD(n_classes=n_classes)
        # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
        biases = list()
        not_biases = list()
        for param_name, param in model.named_parameters():
            if param.requires_grad:
                if param_name.endswith('.bias'):
                    biases.append(param)
                else:
                    not_biases.append(param)
        optimizer = torch.optim.SGD(params=[{
            'params': biases,
            'lr': 2 * lr
        }, {
            'params': not_biases
        }],
                                    lr=lr,
                                    momentum=momentum,
                                    weight_decay=weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_loss = checkpoint['best_loss']
        print(
            '\nLoaded checkpoint from epoch %d. Best loss so far is %.3f.\n' %
            (start_epoch, best_loss))
        model = checkpoint['model']

        # optimizer = checkpoint['optimizer']
        # or
        # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=momentum,
                                    weight_decay=weight_decay)

        print('Learning Rate: ', optimizer.param_groups[-1]['lr'])

    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                              'min',
                                                              factor=0.5,
                                                              patience=20,
                                                              verbose=True)
    # Move to default device
    model = model.to(device)
    criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy,
                             use_focalloss=use_focalloss).to(device)

    # Custom dataloaders
    train_dataset = mydateset(root='../data', transform=True)
    val_dataset = mydateset(root='../data', mode='test')
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=train_dataset.collate_fn,
        num_workers=workers,
        pin_memory=True)  # note that we're passing the collate function here
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             collate_fn=val_dataset.collate_fn,
                                             num_workers=workers,
                                             pin_memory=True)
    # Epochs
    for epoch in range(start_epoch, epochs):
        # One epoch's training
        train(train_loader=train_loader,
              model=model,
              criterion=criterion,
              optimizer=optimizer,
              epoch=epoch)

        # One epoch's validation
        val_loss = validate(val_loader=val_loader,
                            model=model,
                            criterion=criterion)

        # Did validation loss improve?
        is_best = val_loss < best_loss
        best_loss = min(val_loss, best_loss)

        if lr_scheduler is not None:
            lr_scheduler.step(best_loss)

        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))

        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, optimizer,
                        val_loss, best_loss, is_best)
示例#12
0
def main():
    parser = get_parse()

    dist.init_process_group(backend="nccl")
    torch.cuda.set_device(parser.local_rank)

    # Create the data loaders
    dataset = detection_dataset(parser.data_file, parser.classes_file)
    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
    dataloader = DataLoader(dataset, batch_size=parser.batch_size, num_workers=parser.num_workers, collate_fn=collate_fn, pin_memory=True, sampler=sampler)
    print(f"num_clses:{dataset.num_classes()}, num_data: {len(dataset)}")

    # Create the model
    ssd = SSD(dataset.num_classes()+1)
    if parser.model is not None and os.path.isfile(parser.model):
        print("Loading model.")
        # ssd.load_state_dict(torch.load(parser.model))
        d = collections.OrderedDict()
        checkpoint = torch.load(parser.model)
        for key, value in checkpoint.items():
            tmp = key[7:]
            d[tmp] = value
        ssd.load_state_dict(d)
    else:
        print(f"{parser.model} 不存在")

    config["cuda"] = config["cuda"] and torch.cuda.is_available()
    if config["cuda"]:
        ssd = torch.nn.parallel.DistributedDataParallel(ssd.cuda(), device_ids=[parser.local_rank])

    mbox_loss = MultiBoxLoss(config)

    optimizer = optim.Adam(ssd.parameters(), lr=parser.lr, weight_decay=parser.weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True, factor=0.5, threshold=1e-3)

    ssd.train()
    for epoch_num in range(1, parser.epochs+1):
        epoch_loss = []

        t = time.time()
        for iter_num, data in enumerate(dataloader, 1):
            optimizer.zero_grad()

            img_tensor, boxes_tensor = data["img"], data["boxes"]
            if config["cuda"]:
                img_tensor = img_tensor.cuda(non_blocking=True)
                boxes_tensor = boxes_tensor.cuda(non_blocking=True)

            predictions = ssd(img_tensor)
            loc_loss, conf_loss = mbox_loss(predictions, boxes_tensor)
            loss = loc_loss*parser.loss_lamda + conf_loss
            loss.backward()
            torch.nn.utils.clip_grad_norm_(ssd.parameters(), 0.1)
            optimizer.step()

            reduce_conf_loss = reduce_tensor(conf_loss.data)
            reduce_loc_loss = reduce_tensor(loc_loss.data)
            reduce_loss = reduce_conf_loss + reduce_loc_loss

            if parser.local_rank == 0:
                pre_t = t
                t = time.time()
                text = f"[Epoch: {epoch_num}|{parser.epochs}  Iteration: {iter_num}]" \
                    + f"  conf_loss: {reduce_conf_loss.item():1.4f}  loc_loss: {reduce_loc_loss.item():1.4f}"  \
                    + f"  loss: {reduce_loss.item():1.4f}" \
                    + f"  time:{(t-pre_t)*1000:.1f}ms"
                print(text)

                if iter_num % parser.batch_num_log == 0:
                    with open(parser.log_path, "a", encoding="utf-8") as f:
                        f.write(text+"\n")

                if iter_num % parser.batch_num_save == 0:
                    save_model(ssd, "{}/batch.pt".format(parser.save_path))

                epoch_loss.append(float(reduce_loss.data))

            if iter_num % 200 == 0:
                torch.cuda.empty_cache()
        

        if parser.local_rank == 0:
            save_model(ssd, f"{parser.save_path}/epoch.pt")
            scheduler.step(np.mean(epoch_loss))
            print(f"epoch_mean_loss:{np.mean(epoch_loss):.4f}")

    ssd.eval()
    if parser.local_rank == 0:
        save_model(ssd, f"{parser.save_path}/model_final.pt")
def main(
    data_folder,
    keep_difficult,
    lp,  # learning_parameters
    device,
):
    """
    Training.
    """
    label_map, rev_label_map, label_color_map = load_maps(
        os.path.join(data_folder, 'label_map.json'))
    checkpoint_path = os.path.join(
        data_folder,
        "checkpoint_ssd300.pkl")  # path to model checkpoint, None if none
    n_classes = len(label_map)  # number of different types of objects

    # Initialize model or load checkpoint
    if checkpoint_path is None or not os.path.exists(checkpoint_path):
        start_epoch = 0
        model = SSD300(n_classes=n_classes)
        # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
        biases = list()
        not_biases = list()
        for param_name, param in model.named_parameters():
            if param.requires_grad:
                if param_name.endswith('.bias'):
                    biases.append(param)
                else:
                    not_biases.append(param)
        optimizer = torch.optim.SGD(params=[{
            'params': biases,
            'lr': 2 * lp['lr']
        }, {
            'params': not_biases
        }],
                                    lr=lp['lr'],
                                    momentum=lp['momentum'],
                                    weight_decay=lp['weight_decay'])

    else:
        checkpoint = torch.load(checkpoint_path)
        start_epoch = checkpoint['epoch'] + 1
        print('\nLoaded checkpoint from epoch %d.\n' % start_epoch)
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    # Move to default device
    model = model.to(device)
    criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy, device=device)

    # Custom dataloaders
    train_dataset = PascalVOCDataset(data_folder,
                                     split='train',
                                     keep_difficult=keep_difficult)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=lp['batch_size'],
        shuffle=True,
        collate_fn=train_dataset.collate_fn,
        num_workers=lp['workers'],
        pin_memory=True)  # note that we're passing the collate function here

    # Calculate total number of epochs to train and the epochs to decay learning rate at (i.e. convert iterations to epochs)
    # To convert iterations to epochs, divide iterations by the number of iterations per epoch
    # The paper trains for 120,000 iterations with a batch size of 32, decays after 80,000 and 100,000 iterations
    epochs = lp['iterations'] // (len(train_dataset) // 32)
    decay_lr_at = [
        it // (len(train_dataset) // 32) for it in lp['decay_lr_at']
    ]

    # Epochs
    for epoch in range(start_epoch, epochs):

        # Decay learning rate at particular epochs
        if epoch in decay_lr_at:
            adjust_learning_rate(optimizer, lp['decay_lr_to'])

        # One epoch's training
        train(lp,
              train_loader=train_loader,
              model=model,
              criterion=criterion,
              optimizer=optimizer,
              epoch=epoch)

        # Save checkpoint
        save_checkpoint(epoch, model, optimizer, checkpoint_path)
示例#14
0
torch.manual_seed(opt.seed)
if opt.cuda:
    assert torch.cuda.is_available(), 'No GPU found, please run without --cuda'
    torch.cuda.manual_seed_all(opt.seed)

# model
model = SSD300(VOC.N_CLASSES)
cfg = model.config

if opt.checkpoint:
    model.load_state_dict(torch.load(opt.checkpoint))
else:
    model.init_parameters(opt.backbone)

encoder = MultiBox(cfg)
criterion = MultiBoxLoss()

# cuda
if opt.cuda:
    model.cuda()
    criterion.cuda()
    cudnn.benchmark = True

# optimizer
optimizer = optim.SGD(model.parameters(),
                      lr=opt.lr,
                      momentum=0.9,
                      weight_decay=5e-4)

# learning rate / iterations
init_lr = cfg.get('init_lr', 1e-3)
示例#15
0
def main():
    n_classes = len(label_map)
    logger.debug(n_classes)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    checkpoint = configs['checkpoint']
    batch_size = configs['batch_size']
    start_epoch = configs['start_epoch']               #开始的epoch
    epochs = configs['epochs']                           #本次训练的epoch
    epochs_since_improvement = configs['epochs_since_improvement'] 
    best_loss = configs['best_loss']
    num_workers = configs['num_workers']
    lr = configs['lr']
    momentum = configs['momentum']
    weight_decay = configs['weight_decay']
    grad_clip = configs['grad_clip']
    backbone = configs['backbone']
    best_save = configs['best_model']
    save_model = configs['save_model']
    
    model = SSD(class_num=n_classes, backbone=backbone, device=device)
    #model = SSDLite(class_num=n_classes, backbone=backbone, device=device)
    if checkpoint is not None:
        model = load_pretrained(model, checkpoint)        #加载预训练模型

    data_folder = configs['data_folder']

    val_dataset = Dataset(data_folder, split='test', keep_difficult=keep_difficult)
    train_dataset = Dataset(data_folder, split='train', keep_difficult=keep_difficult)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn, num_workers=num_workers, pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=val_dataset.collate_fn, num_workers=num_workers, pin_memory=True)

    biases = list()
    not_biases = list()
    param_names_biases = list()
    param_names_not_biases = list()
    for param_name, param in model.named_parameters():
        if param.requires_grad:
            if param_name.endswith('.bias'):
                biases.append(param)
                param_names_biases.append(param_name)
            else:
                not_biases.append(param)
                param_names_not_biases.append(param_name)
    optimizer = torch.optim.SGD(params=[{'params':biases,'lr': 2*lr}, {'params':not_biases}],
                                lr=lr, momentum=momentum, weight_decay=weight_decay)
    
    model = model.to(device)
    criterion = MultiBoxLoss(priors_cxcy=model.priors).to(device)

    print(start_epoch)
    logger.debug(start_epoch)
    logger.debug(backbone)
    
    for epoch in range(start_epoch, epochs):
        train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, epoch=epoch, grad_clip=grad_clip)

        val_loss = validate(val_loader=val_loader, model=model, criterion=criterion)

        is_best = val_loss < best_loss
        best_loss = min(val_loss, best_loss)

        if not is_best:
            epochs_since_improvement += 1
            print('\nEpochs since last improvment: %d\n' %(epochs_since_improvement))
            logger.debug('\nEpochs since last improvment: %d\n' %(epochs_since_improvement))
        else:
            epochs_since_improvement = 0
            torch.save(model.state_dict(), best_save)
        
    torch.save(model.state_dict(), save_model)    
    logger.debug("End of training.")
示例#16
0
# 学習の再開時はargs['resume']のパラメータをロード
if args['resume']:
    print('Resuming training, loading {}...'.format(args['resume']))
    net.load_weights(args['save_folder'] + args['resume'])

sources = list()
loc = list()
conf = list()

if args['cuda']:
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True

# 損失関数の設定
criterion = MultiBoxLoss(voc_config['num_classes'], 0.5, True, 0, True, 3, 0.5,
                         False, args['cuda'])

# 最適化パラメータの設定
optimizer = optim.SGD(net.parameters(),
                      lr=args['lr'],
                      momentum=args['momentum'],
                      weight_decay=args['weight_decay'])

# 訓練モード
net.train()
# loss counters
loc_loss = 0
conf_loss = 0
epoch = 0
print('Loading the dataset...')
示例#17
0
    def __init__(self, net, cfg):

        self.cfg = cfg

        self.net = net
        self.anchors = generate_anchors(cfg)

        if torch.cuda.is_available():
            self.net.cuda()
            self.anchors = self.anchors.cuda()

        # Dataset transform
        transform = [
            Transform(context_amount=cfg.TRAIN.CROP_CONTEXT_AMOUNT_Z, size=cfg.MODEL.Z_SIZE),
            Transform(context_amount=cfg.TRAIN.CROP_CONTEXT_AMOUNT_X, size=cfg.MODEL.X_SIZE,
                      random_translate=True, random_resize=True, motion_blur=True,
                      random_translate_range=cfg.TRAIN.DATA_AUG_TRANSLATE_RANGE,
                      random_resize_scale_min=cfg.TRAIN.DATA_AUG_RESIZE_SCALE_MIN,
                      random_resize_scale_max=cfg.TRAIN.DATA_AUG_RESIZE_SCALE_MAX
                      )
        ]

        # Training dataset
        trackingnet = TrackingNet(cfg.PATH.TRACKINGNET, subset="train", debug_seq=cfg.TRAIN.DEBUG_SEQ)
        imagenet = ImageNetVID(cfg.PATH.ILSVRC, subset="train")
        sampler = PairSampler([trackingnet, imagenet], cfg=cfg, transform=transform, pairs_per_video=cfg.TRAIN.PAIRS_PER_VIDEO,
                              frame_range=cfg.TRAIN.FRAME_RANGE)
        # Distractor dataset
        coco = CocoDetection(cfg.PATH.COCO, cfg.PATH.COCO_ANN_FILE)
        # coco_distractor = COCODistractor(coco, 4000)
        coco_positive = COCOPositivePair(coco, 4000, cfg=cfg, transform=transform)
        coco_negative = COCONegativePair(coco, 12000, cfg=cfg, transform=transform)

        dataset = ConcatDataset([sampler, coco_positive, coco_negative])
        self.dataloader = DataLoader(dataset, batch_size=cfg.TRAIN.BATCH_SIZE, num_workers=4, shuffle=True,
                                     pin_memory=True, drop_last=True)

        # Validation dataset
        val_trackingnet = TrackingNet(cfg.PATH.TRACKINGNET, subset="val")
        val_imagenet = ImageNetVID(cfg.PATH.ILSVRC, subset="val")
        validation_sampler = PairSampler([val_trackingnet, val_imagenet], cfg=cfg, transform=transform,
                                         pairs_per_video=1, frame_range=cfg.TRAIN.FRAME_RANGE)
        val_coco_positive = COCOPositivePair(coco, 100, cfg=cfg, transform=transform)
        val_dataset = ConcatDataset([validation_sampler, val_coco_positive])

        if cfg.TRAIN.DEBUG_SEQ >= 0:  # When debugging on a single sequence, the validation is performed on the same one
            val_dataset = PairSampler([trackingnet], cfg=cfg, transform=transform, pairs_per_video=200)

        self.validation_dataloader = DataLoader(val_dataset, batch_size=min(cfg.TRAIN.BATCH_SIZE, 20), num_workers=4,
                                                shuffle=True, pin_memory=True, drop_last=False)

        # Loss
        self.criterion = MultiBoxLoss(self.anchors, cfg)

        self.optimizer = optim.Adam(self.net.parameters(), lr=cfg.TRAIN.LR, weight_decay=cfg.TRAIN.WEIGHT_DECAY)
        self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=cfg.TRAIN.SCHEDULER_STEP_SIZE,
                                                   gamma=cfg.TRAIN.SCHEDULER_GAMMA)

        # Summary Writer
        self.run_id = datetime.now().strftime('%b%d_%H-%M-%S')
        if not cfg.DEBUG:
            self.save_config()
            self.save_code()
            self.writer = SummaryWriter(log_dir=os.path.join(cfg.PATH.DATA_DIR, "runs", self.run_id))

        self.start_epoch = 0

        if cfg.TRAIN.RESUME_CHECKPOINT:
            self.start_epoch = utils.load_checkpoint(cfg.TRAIN.RESUME_CHECKPOINT, self.net, self.optimizer)

        if torch.cuda.is_available():
            self.net = nn.DataParallel(self.net)

        self.best_IOU = 0.
示例#18
0
def train():
    set_seed(seed=10)
    os.makedirs(args.save_root, exist_ok=True)

    # create model, optimizer and criterion
    model = SSD300(n_classes=len(label_map), device=device)
    biases = []
    not_biases = []
    for name, param in model.named_parameters():
        if param.requires_grad:
            if name.endswith('.bias'):
                biases.append(param)
            else:
                not_biases.append(param)
    model = model.to(device)
    optimizer = torch.optim.SGD(params=[{
        'params': biases,
        'lr': 2 * args.lr
    }, {
        'params': not_biases
    }],
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    if args.resume is None:
        start_epoch = 0
    else:
        checkpoint = torch.load(args.resume, map_location=device)
        start_epoch = checkpoint['epoch'] + 1
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
    print(f'Training will start at epoch {start_epoch}.')

    criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy,
                             device=device,
                             alpha=args.alpha)
    criterion = criterion.to(device)
    '''
    scheduler = StepLR(optimizer=optimizer,
                       step_size=20,
                       gamma=0.5,
                       last_epoch=start_epoch - 1,
                       verbose=True)
    '''

    # load data
    transform = Transform(size=(300, 300), train=True)
    train_dataset = VOCDataset(root=args.data_root,
                               image_set=args.image_set,
                               transform=transform,
                               keep_difficult=True)
    train_loader = DataLoader(dataset=train_dataset,
                              collate_fn=collate_fn,
                              batch_size=args.batch_size,
                              num_workers=args.num_workers,
                              shuffle=True,
                              pin_memory=True)

    losses = AverageMeter()
    for epoch in range(start_epoch, args.num_epochs):
        # decay learning rate at particular epochs
        if epoch in [120, 140, 160]:
            adjust_learning_rate(optimizer, 0.1)

        # train model
        model.train()
        losses.reset()
        bar = tqdm(train_loader, desc='Train the model')
        for i, (images, bboxes, labels, _) in enumerate(bar):
            images = images.to(device)
            bboxes = [b.to(device) for b in bboxes]
            labels = [l.to(device) for l in labels]

            predicted_bboxes, predicted_scores = model(
                images)  # (N, 8732, 4), (N, 8732, num_classes)
            loss = criterion(predicted_bboxes, predicted_scores, bboxes,
                             labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            losses.update(loss.item(), images.size(0))

            if i % args.print_freq == args.print_freq - 1:
                bar.write(f'Average Loss: {losses.avg:.4f}')

        bar.write(f'Epoch: [{epoch + 1}|{args.num_epochs}] '
                  f'Average Loss: {losses.avg:.4f}')
        # adjust learning rate
        # scheduler.step()

        # save model
        state_dict = {
            'epoch': epoch,
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        save_path = os.path.join(args.save_root, 'ssd300.pth')
        torch.save(state_dict, save_path)

        if epoch % args.save_freq == args.save_freq - 1:
            shutil.copyfile(
                save_path,
                os.path.join(args.save_root, f'ssd300_epochs_{epoch + 1}.pth'))
示例#19
0
        return boxes, labels, scores

checkpoint_path = gdrive_dir+'/pretrained/mine.pt'
# checkpoint = torch.load(checkpoint_path) # None

if False:
    ssd_eff = checkpoint['model']
    optimizer = checkpoint['optimizer']
    #exp_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.96, last_epoch=checkpoint['epoch']-1)
else:
    print('New model')
    ssd_eff = SSDEff(n_classes=15).to(device)
    optimizer = torch.optim.Adam(ssd_eff.parameters(), lr=2e-3, weight_decay=5e-4)
    #exp_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.96)

loss_func = MultiBoxLoss(priors_cxcy=ssd_eff.get_prior_boxes(), threshold=0.5, alpha=1., neg_pos_ratio=3, focal_loss=False)

grad_clip = None

def train_epoch(model, trainset_loader, loss_func, optimizer, epoch_id):
    model.train()
    train_loss = 0
    for step, (imgs, boxes, labels) in enumerate(trainset_loader):

        # print(type(imgs),imgs.shape, imgs)
        # print(type(boxes),boxes[0].shape, boxes)
        # print(type(labels),labels[0].shape, labels)
        # break
        # move input data to GPU
        imgs = imgs.to(device)
        boxes = [b.to(device) for b in boxes]