示例#1
0
def main():
    # assert torch.cuda.is_available(), 'Error: CUDA not found!'
    best_loss = float('inf')  # best test loss
    start_epoch = 0  # start from epoch 0 or last epoch
    save_model_path = args.model

    # Data
    print('==> Preparing data..')
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Model
    net = RetinaNet()
    net.load_state_dict(torch.load('./model/net.pth'))

    net = torch.nn.DataParallel(net,
                                device_ids=range(torch.cuda.device_count()))
    net.cuda()

    criterion = FocalLoss()
    #     optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-4)
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=1e-4)

    for epoch in range(start_epoch, start_epoch + args.train_epoch):
        train(epoch + 1, transform, net, optimizer, criterion)
        save_model(epoch + 1, save_model_path, net, optimizer)
示例#2
0
def main():
    print('==> chooseing data..')
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    # Model
    net = RetinaNet()
    criterion = FocalLoss()
    #     optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-4)
    optimizer = optim.SGD(net.parameters(),
                          lr=0.001,
                          momentum=0.9,
                          weight_decay=1e-4)

    load_model_epoch = args.load_model_epoch
    checkpoint = torch.load(
        './checkpoint/{}_ckpt.pth'.format(load_model_epoch))  # max_epoch
    net = torch.nn.DataParallel(net,
                                device_ids=range(torch.cuda.device_count()))
    net.cuda()
    net.load_state_dict(checkpoint['net'])

    optimizer.load_state_dict(checkpoint['optimizer'])
    start_epoch = checkpoint['epoch']

    test(start_epoch, transform, net, criterion, optimizer)
示例#3
0
文件: train_0.py 项目: chicm/detect
def run_train(args):
    assert torch.cuda.is_available(), 'Error: CUDA not found!'
    best_loss = float('inf')  # best test loss
    start_epoch = 0  # start from epoch 0 or last epoch

    # Data
    print('==> Preparing data..')

    trainloader = get_train_loader(img_dir=settings.IMG_DIR,
                                   batch_size=batch_size)
    #trainloader = get_small_train_loader()
    print(trainloader.num)
    #testloader = get_train_loader(img_dir=settings.IMG_DIR)

    # Model
    net = RetinaNet()
    #net.load_state_dict(torch.load('./model/net.pth'))
    net.load_state_dict(torch.load('./ckps/best_0.pth'))
    net = torch.nn.DataParallel(net,
                                device_ids=range(torch.cuda.device_count()))
    net.cuda()

    criterion = FocalLoss()
    #optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4)
    optimizer = optim.Adam(net.parameters(), lr=args.lr)

    iter_save = 200
    bgtime = time.time()
    # Training
    for epoch in range(start_epoch, start_epoch + 100):
        print('\nEpoch: %d' % epoch)
        net.train()
        #net.module.freeze_bn()
        train_loss = 0
        for batch_idx, (inputs, loc_targets,
                        cls_targets) in enumerate(trainloader):
            inputs = Variable(inputs.cuda())
            loc_targets = Variable(loc_targets.cuda())
            cls_targets = Variable(cls_targets.cuda())

            optimizer.zero_grad()
            loc_preds, cls_preds = net(inputs)
            loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets)
            loss.backward()
            optimizer.step()

            #train_loss += loss.data[0]
            sample_num = (batch_idx + 1) * batch_size
            avg_loss = running_loss(loss.data[0])
            print(
                'Epoch: {}, num: {}/{} train_loss: {:.3f} | run_loss: {:.3f} min: {:.1f}'
                .format(epoch, sample_num, trainloader.num, loss.data[0],
                        avg_loss, (time.time() - bgtime) / 60),
                end='\r')

            if batch_idx % iter_save == 0:
                torch.save(
                    net.module.state_dict(),
                    './ckps/best_{}.pth'.format(batch_idx // iter_save % 5))
                log.info('batch: {}, loss: {:.4f}'.format(batch_idx, avg_loss))
def load_model(backbone):
    print('loading model...')
    model = torch.load(os.path.join('model', 'restnet101_8K.pth'))
    net = RetinaNet(backbone=backbone, num_classes=len(cfg.classes))
    net = torch.nn.DataParallel(net,
                                device_ids=range(torch.cuda.device_count()))
    net.cuda()
    cudnn.benchmark = True
    net.load_state_dict(model['net'])
    return net
示例#5
0
def train(total_epochs=1, interval=100, resume=False, ckpt_path = ''):
    print("Loading training dataset...")
    train_dset = OpenImagesDataset(root='./data/train',
                            list_file ='./data/tmp/train_images_bbox.csv',
                            transform=transform, train=True, input_size=600)

    train_loader = data.DataLoader(train_dset, batch_size=4, shuffle=True, num_workers=4, collate_fn=train_dset.collate_fn)
    
    print("Loading completed.")

    #val_dset = OpenImagesDataset(root='./data/train',
    #                  list_file='./data/tmp/train_images_bbox.csv', train=False, transform=transform, input_size=600)
    #val_loader = torch.utils.data.DataLoader(val_dset, batch_size=1, shuffle=False, num_workers=4, collate_fn=val_dset.collate_fn)

    net = RetinaNet()
    net.load_state_dict(torch.load('./model/net.pth'))

    criterion = FocalLoss()
    
    net.cuda()
    criterion.cuda()
    optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)
    best_val_loss = 1000

    start_epoch=0

    if resume:
        if os.path.isfile(ckpt_path):
            print(f'Loading from the checkpoint {ckpt_path}')
            checkpoint = torch.load(ckpt_path)
            start_epoch = checkpoint['epoch']
            best_val_loss = checkpoint['best_val_loss']
            net.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print(f'Loaded checkpoint {ckpt_path}, epoch : {start_epoch}')
        else:
            print(f'No check point found at the path {ckpt_path}')

    

    for epoch in range(start_epoch, total_epochs):
        train_one_epoch(train_loader, net, criterion, optimizer, epoch, interval)
        val_loss = 0
        #val_loss = validate(val_loader, net, criterion, interval)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            save_checkpoint({
                'epoch': epoch+1,
                'state_dict': net.state_dict(),
                'best_val_loss': best_val_loss,
                'optimizer' : optimizer.state_dict()
            }, is_best=True)
示例#6
0
def test_retinanet():
    """test retinanet"""
    net = RetinaNet(classes=80)
    x = Variable(torch.rand(1, 3, 500, 500), volatile=True)

    now = time.time()
    net.cuda()
    predictions = net(x)
    later = time.time()

    print(later - now)

    for prediction in predictions:
        print(prediction.size())
示例#7
0
def evaluate_threshold(img_ids, cls_threshold, bbox_dict):

    dloader = get_test_loader(img_ids,
                              img_dir=settings.IMG_DIR,
                              batch_size=batch_size)

    # Model
    net = RetinaNet()
    net.load_state_dict(torch.load(CKP_FILE))
    #net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    net.cuda()
    net.eval()

    bgtime = time.time()
    encoder = DataEncoder()
    encoder.class_threshold = cls_threshold
    true_objects_num = 0
    pred_objects_num = 0

    for batch_idx, inputs in enumerate(dloader):
        inputs = Variable(inputs.cuda())
        loc_preds, cls_preds = net(inputs)

        for i in range(len(loc_preds)):
            boxes, labels, scores = encoder.decode(
                loc_preds[i].data, cls_preds[i].data,
                (settings.IMG_SZ, settings.IMG_SZ))
            pred_objects_num += len(scores)

        for img_idx in range(len(inputs)):
            img_id = dloader.img_ids[batch_idx * batch_size + img_idx]
            if img_id in bbox_dict:
                true_objects_num += len(bbox_dict[img_id])

        print('{} / {}, {} / {}, {:.4f},  {:.2f} min'.format(
            batch_size * (batch_idx + 1), dloader.num, pred_objects_num,
            true_objects_num, cls_threshold, (time.time() - bgtime) / 60),
              end='\r')

    print('\n')
    print('=>>> {}/{}, {}, {:.4f}\n'.format(
        pred_objects_num, true_objects_num,
        pred_objects_num - true_objects_num, cls_threshold))
示例#8
0
def predict():
    assert torch.cuda.is_available(), 'Error: CUDA not found!'
    print('==> Preparing data..')

    dloader = get_test_loader(get_test_ids(),
                              img_dir=settings.TEST_IMG_DIR,
                              batch_size=batch_size)
    print(dloader.num)

    # Model
    net = RetinaNet()
    net.load_state_dict(torch.load(CKP_FILE))
    #net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    net.cuda()

    bgtime = time.time()
    encoder = DataEncoder()
    net.eval()
    prediction_strings = []
    for batch_idx, inputs in enumerate(dloader):
        inputs = Variable(inputs.cuda())

        loc_preds, cls_preds = net(inputs)
        print('{} / {}  {:.2f}'.format(batch_size * (batch_idx + 1),
                                       dloader.num,
                                       (time.time() - bgtime) / 60),
              end='\r')
        for i in range(len(loc_preds)):
            boxes, labels, scores = encoder.decode(
                loc_preds[i].data, cls_preds[i].data,
                (settings.IMG_SZ, settings.IMG_SZ))
            prediction_strings.append(
                _get_prediction_string(boxes, labels, scores))
    print(len(prediction_strings))
    print(prediction_strings[:3])
    submission = pd.DataFrame({
        'ImageId': dloader.img_ids,
        'PredictionString': prediction_strings
    })
    submission.to_csv('sub7.csv', index=False)
示例#9
0
文件: main.py 项目: nicoledjy/dl_proj
    def train_obj():

        # Model
        net = RetinaNet()
        net = torch.nn.DataParallel(net,
                                    device_ids=range(
                                        torch.cuda.device_count()))
        net.cuda()

        criterion = FocalLoss()
        optimizer = optim.SGD(net.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=1e-4)

        scheduler_obj = torch.optim.lr_scheduler.LambdaLR(
            optimizer,
            lr_lambda=lambda x: (1 - x / (len(trainloader) * epochs))**0.9)

        obj_trainer = ObjDetTrainer(net, criterion, optimizer, scheduler_obj,
                                    trainloader, valloader, device)
        obj_trainer.train(epochs, True)
示例#10
0
                                         shuffle=False,
                                         num_workers=8,
                                         collate_fn=testset.collate_fn)

# Model
net = RetinaNet(num_classes=15)
net.load_state_dict(torch.load('./model/dota_15c_9ma.pth'))
if resume:
    print('==> Resuming from checkpoint..')
    checkpoint = torch.load('./checkpoint/ckpt.pth')
    net.load_state_dict(checkpoint['net'])
    best_loss = checkpoint['loss']
    start_epoch = checkpoint['epoch']

net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
net.cuda()

if fix == 'head':
    for param in net.module.fpn.conv1.parameters():
        param.requires_grad = False
    for param in net.module.fpn.bn1.parameters():
        param.requires_grad = False
    for param in net.module.fpn.layer1.parameters():
        param.requires_grad = False
    for param in net.module.fpn.layer2.parameters():
        param.requires_grad = False
    for param in net.module.fpn.layer3.parameters():
        param.requires_grad = False
    for param in net.module.fpn.layer4.parameters():
        param.requires_grad = False
else:
示例#11
0
文件: eval.py 项目: Dev2022/RetinaNet
    transform=transform,
    input_size=512)

testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=batch_sizes,
                                         shuffle=False,
                                         num_workers=1,
                                         collate_fn=testset.collate_fn)

encoder = DataEncoder()

print('Loading model..')
net = RetinaNet()
net.load_state_dict(torch.load('checkpoint/ckpt.pth')['net'])
net.eval()
net = net.cuda()

the_classes = [c.strip() for c in open('data/voc.names').readlines()]
w = h = 512


def test():
    the_det_file = {}
    forward_tlist = []
    _t = {'im_detect': Timer()}

    for batch_idx, (inputs, loc_targets, cls_targets,
                    fname) in enumerate(testloader):
        _t['im_detect'].tic()
        inputs = inputs.cuda()
示例#12
0
    print('==> Resuming from checkpoint..')
    checkpoint = torch.load(
        '/media/Darius/shayeree/mixed_precision/training/checkpoint_list_wise/albertsons/Retina50ProdB1/ckpt_0010_17696.4766.pth'
    )
    net.load_state_dict(checkpoint['net'])
    best_loss = checkpoint['loss']
    start_epoch = checkpoint['epoch']
else:
    params_dict = torch.load('./model/{:s}.pth'.format(args.net))
    net_dict = net.fpn.state_dict()
    params_dict = {k: v for k, v in params_dict.items() if k in net_dict}
    net_dict.update(params_dict)
    net.fpn.load_state_dict(net_dict)
#net,optimizer=amp.initialize(net,optimizer,opt_level='01',loss_scale="dynamic")
#net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
net.cuda()

criterion = FocalLoss(num_classes=1)
optimizer = optim.SGD(net.parameters(),
                      lr=args.lr,
                      momentum=0.9,
                      weight_decay=1e-4)
#amp_handle = amp.init(enabled=True, verbose=True)
#optimizer = amp_handle.wrap_optimizer(optimizer)
opt_level = 'O1'
net, optimizer = amp.initialize(net.cuda(),
                                optimizer,
                                opt_level=opt_level,
                                loss_scale="dynamic")

#net,optimizer=amp.initialize(net.cuda(),optimizer,opt_level='O1',loss_scale="dynamic")
示例#13
0
def train():
    args = parse_args()

    assert torch.cuda.is_available(), 'Error: CUDA not found!'
    assert args.focal_loss, "OHEM + ce_loss is not working... :("

    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    if not os.path.exists(args.logdir):
        os.mkdir(args.logdir)

    ###########################################################################
    # Data
    ###########################################################################

    print('==> Preparing data..')
    trainset = ListDataset(root='/mnt/9C5E1A4D5E1A2116/datasets/',
                           dataset=args.dataset,
                           train=True,
                           transform=Augmentation_traininig,
                           input_size=args.input_size,
                           multi_scale=args.multi_scale)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.num_workers,
                                              collate_fn=trainset.collate_fn)

    ###########################################################################

    # Training Detail option\
    stepvalues = (10000, 20000, 30000, 40000, 50000) if args.dataset in ["SynthText"] \
        else (2000, 4000, 6000, 8000, 10000)
    best_loss = float('inf')  # best test loss
    start_epoch = 0  # start from epoch 0 or last epoch
    iteration = 0
    cur_lr = args.lr
    mean = (0.485, 0.456, 0.406)
    var = (0.229, 0.224, 0.225)
    step_index = 0
    pEval = None

    ###########################################################################
    # Model
    ###########################################################################

    # set model (focal_loss vs OHEM_CE loss)
    if args.focal_loss:
        imagenet_pretrain = 'weights/retinanet_se50.pth'
        criterion = FocalLoss()
        num_classes = 1
    else:
        imagenet_pretrain = 'weights/retinanet_se50_OHEM.pth'
        criterion = OHEM_loss()
        num_classes = 2

    net = RetinaNet(num_classes)

    # Restore model weights
    net.load_state_dict(torch.load(imagenet_pretrain))

    if args.resume:
        print('==> Resuming from checkpoint..', args.resume)
        checkpoint = torch.load(args.resume)
        net.load_state_dict(checkpoint['net'])
        #start_epoch = checkpoint['epoch']
        #iteration = checkpoint['iteration']
        #cur_lr = checkpoint['lr']
        #step_index = checkpoint['step_index']
        # optimizer.load_state_dict(state["optimizer"])

    print("multi_scale : ", args.multi_scale)
    print("input_size : ", args.input_size)
    print("stepvalues : ", stepvalues)
    print("start_epoch : ", start_epoch)
    print("iteration : ", iteration)
    print("cur_lr : ", cur_lr)
    print("step_index : ", step_index)
    print("num_gpus : ", torch.cuda.device_count())

    # Data parellelism for multi-gpu training
    net = torch.nn.DataParallel(net,
                                device_ids=range(torch.cuda.device_count()))
    net.cuda()

    # Put model in training mode and freeze batch norm.
    net.train()
    net.module.freeze_bn()  # you must freeze batchnorm

    ###########################################################################
    # Optimizer
    ###########################################################################

    optimizer = optim.SGD(net.parameters(),
                          lr=cur_lr,
                          momentum=0.9,
                          weight_decay=1e-4)
    #optimizer = optim.Adam(net.parameters(), lr=cur_lr)

    ###########################################################################
    # Utils
    ###########################################################################

    encoder = DataEncoder()
    writer = SummaryWriter(log_dir=args.logdir)

    ###########################################################################
    # Training loop
    ###########################################################################

    t0 = time.time()
    for epoch in range(start_epoch, 10000):
        if iteration > args.max_iter:
            break

        for inputs, loc_targets, cls_targets in trainloader:
            inputs = Variable(inputs.cuda())
            loc_targets = Variable(loc_targets.cuda())
            cls_targets = Variable(cls_targets.cuda())

            optimizer.zero_grad()
            loc_preds, cls_preds = net(inputs)

            loc_loss, cls_loss = criterion(loc_preds, loc_targets, cls_preds,
                                           cls_targets)
            loss = loc_loss + cls_loss
            loss.backward()
            optimizer.step()

            if iteration % 20 == 0:
                t1 = time.time()

                print(
                    'iter ' + repr(iteration) + ' (epoch ' + repr(epoch) +
                    ') || loss: %.4f || l loc_loss: %.4f || l cls_loss: %.4f (Time : %.1f)'
                    % (loss.sum().item(), loc_loss.sum().item(),
                       cls_loss.sum().item(), (t1 - t0)))
                # t0 = time.time()

                writer.add_scalar('loc_loss', loc_loss.sum().item(), iteration)
                writer.add_scalar('cls_loss', cls_loss.sum().item(), iteration)
                writer.add_scalar('loss', loss.sum().item(), iteration)

                # show inference image in tensorboard
                infer_img = np.transpose(inputs[0].cpu().numpy(), (1, 2, 0))
                infer_img *= var
                infer_img += mean
                infer_img *= 255.
                infer_img = np.clip(infer_img, 0, 255)
                infer_img = infer_img.astype(np.uint8)
                h, w, _ = infer_img.shape

                boxes, labels, scores = encoder.decode(loc_preds[0],
                                                       cls_preds[0], (w, h))
                boxes = boxes.reshape(-1, 4, 2).astype(np.int32)

                if boxes.shape[0] != 0:
                    # infer_img = infer_img/np.float32(255)

                    # print(boxes)
                    # print(
                    #     f"infer_img prior to cv2.polylines - dtype: {infer_img.dtype}, shape: {infer_img.shape}, min: {infer_img.min()}, max: {infer_img.max()}")
                    # print(
                    #     f"boxes prior to cv2.polylines - dtype: {boxes.dtype}, shape: {boxes.shape}, min: {boxes.min()}, max: {boxes.max()}")
                    infer_img = cv2.polylines(infer_img.copy(), boxes, True,
                                              (0, 255, 0), 4)

                # print(
                #     f"infer_img - dtype: {infer_img.dtype}, shape: {infer_img.shape}, min: {infer_img.min()}, max: {infer_img.max()}")

                writer.add_image('image',
                                 infer_img,
                                 iteration,
                                 dataformats="HWC")
                writer.add_scalar('input_size', h, iteration)
                writer.add_scalar('learning_rate', cur_lr, iteration)

                t0 = time.time()

            if iteration % args.save_interval == 0 and iteration > 0:
                print('Saving state, iter : ', iteration)
                state = {
                    'net': net.module.state_dict(),
                    "optimizer": optimizer.state_dict(),
                    'iteration': iteration,
                    'epoch': epoch,
                    'lr': cur_lr,
                    'step_index': step_index
                }
                model_file = args.save_folder + \
                    'ckpt_' + repr(iteration) + '.pth'
                torch.save(state, model_file)

            if iteration in stepvalues:
                step_index += 1
                cur_lr = adjust_learning_rate(cur_lr, optimizer, args.gamma,
                                              step_index)

            if iteration > args.max_iter:
                break

            if args.evaluation and iteration % args.eval_step == 0:
                try:
                    if pEval is None:
                        print("Evaluation started at iteration {} on IC15...".
                              format(iteration))
                        eval_cmd = "CUDA_VISIBLE_DEVICES=" + str(args.eval_device) + \
                            " python eval.py" + \
                            " --tune_from=" + args.save_folder + 'ckpt_' + repr(iteration) + '.pth' + \
                            " --input_size=1024" + \
                            " --output_zip=result_temp1"

                        pEval = Popen(eval_cmd,
                                      shell=True,
                                      stdout=PIPE,
                                      stderr=PIPE)

                    elif pEval.poll() is not None:
                        (scorestring, stderrdata) = pEval.communicate()

                        hmean = float(
                            str(scorestring).strip().split(":")[3].split(",")
                            [0].split("}")[0].strip())

                        writer.add_scalar('test_hmean', hmean, iteration)

                        print("test_hmean for {}-th iter : {:.4f}".format(
                            iteration, hmean))

                        if pEval is not None:
                            pEval.kill()
                        pEval = None

                except Exception as e:
                    print("exception happened in evaluation ", e)
                    if pEval is not None:
                        pEval.kill()
                    pEval = None

            iteration += 1
示例#14
0
testset = ListDataset(root='/search/odin/liukuang/data/voc_all_images',
                      list_file='./data/voc12_val.txt', train=False, transform=transform, input_size=600)
testloader = torch.utils.data.DataLoader(testset, batch_size=16, shuffle=False, num_workers=8, collate_fn=testset.collate_fn)

# Model
net = RetinaNet()
net.load_state_dict(torch.load('./model/net.pth'))
if args.resume:
    print('==> Resuming from checkpoint..')
    checkpoint = torch.load('./checkpoint/ckpt.pth')
    net.load_state_dict(checkpoint['net'])
    best_loss = checkpoint['loss']
    start_epoch = checkpoint['epoch']

net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
net.cuda()

criterion = FocalLoss()
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4)

# Training
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    net.module.freeze_bn()
    train_loss = 0
    for batch_idx, (inputs, loc_targets, cls_targets) in enumerate(trainloader):
        inputs = Variable(inputs.cuda())
        loc_targets = Variable(loc_targets.cuda())
        cls_targets = Variable(cls_targets.cuda())