예제 #1
0
def train(optimizer, num_classes, num_epochs, scheduler, device):
    load = get_dataset()
    model = get_model_instance_segmentation(num_classes)
    model = model.to(device)

    if optimizer == 'Adam':
        exp_optimizer = optim.Adam(model.parameters(), lr=1e-3)
    else:
        exp_optimizer = optim.SGD(model.parameters(),
                                  lr=0.005,
                                  momentum=0.9,
                                  weight_decay=0.0005)

    if scheduler:
        lr_scheduler = optim.lr_scheduler.StepLR(exp_optimizer,
                                                 step_size=3,
                                                 gamma=0.1)

    for epoch in range(num_epochs):
        train_one_epoch(model,
                        exp_optimizer,
                        load['train'],
                        device,
                        epoch,
                        print_freq=10)
        lr_scheduler.step()
        evaluate(model, load['val'], device=device)

    torch.save(model.state_dict(), 'best_model')

    print('Finished')
예제 #2
0
def train(data_root):

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    # background and person
    num_classes = 2
    dataset = PennFudanDataset(data_root,get_transform(train=True))
    dataset_test = PennFudanDataset(data_root,get_transform(train=False))

    # split the dataset
    indices = torch.randperm(len(dataset)).tolist()
    dataset = Subset(dataset,indices[:-50])
    dataset_test = Subset(dataset_test,indices[-50:])

    # define data loaders
    data_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4,
                             collate_fn=tools.collate_fn)
    data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=4,
                                  collate_fn=tools.collate_fn)

    # get model
    model = get_model_instance_segmentation(num_classes)
    model.to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,lr=0.005,momentum=0.9,weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3)
    
    num_epochs =10
    for epoch in range(num_epochs):
        train_one_epoch(model,optimizer,data_loader,device,epoch,print_freq=10)
        lr_scheduler.step()
        # evaluate(model,data_loader_test,device=device)
    torch.save(model.state_dict(),"masknet.pth")
    print("OK!")
예제 #3
0
    'loss_bb_regression_max', 'loss_bb_regression_min', 'loss_classifier_avg',
    'loss_classifier_median', 'loss_classifier_max', 'loss_classifier_min',
    'loss_rpn_bb_regression_avg', 'loss_rpn_bb_regression_median',
    'loss_rpn_bb_regression_max', 'loss_rpn_bb_regression_min'
]

train_epochs_log = pd.DataFrame(columns=columns_epochs)
train_iterations_log = pd.DataFrame(columns=columns_iterations)

# Train the network (saving the best model)
for epoch in range(0, epochs):
    # train for one epoch, printing every <print_freq> iterations
    training_results, train_iterations_log = train_one_epoch(
        model,
        optimizer,
        loader_train,
        device,
        epoch,
        print_freq=1,
        df=train_iterations_log)

    # add epoch logs to df
    train_epochs_log = helper.df_add_epoch_log(train_epochs_log, epoch,
                                               training_results)

    # evaluate on the validation data set
    mAP = evaluate(model, loader_validation, device=device)

    # Check to keep best model
    if mAP > best_mAP:
        best_mAP = mAP
        # Save model
예제 #4
0
    # Training loop
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=0.005,
                                momentum=0.9,
                                weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)
    num_epochs = 10
    for e in range(num_epochs):
        # train for one epoch
        train_one_epoch(model,
                        optimizer,
                        train_loader,
                        device,
                        e,
                        print_freq=10)
        # update learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        print('entering eval')
        print(len(val_loader))
        evaluate(model, val_loader, device=device)
        # save model
        if e % 10 == 0:
            torch.save({
                'epoch': e,
                'model_state_dict': model.state_dict()
            }, f'leaf_od' + str(e) + 'EPOCH_checkpoint.pt')
예제 #5
0
def main(args):
    utils.init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    # Data loading code
    print("Loading data")

    dataset, num_classes = get_dataset(args.dataset, "train",
                                       get_transform(train=True),
                                       args.data_path)
    dataset_test, _ = get_dataset(args.dataset, "val",
                                  get_transform(train=False), args.data_path)

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(
            dataset_test)
    else:
        train_sampler = torch.utils.data.RandomSampler(dataset)
        test_sampler = torch.utils.data.SequentialSampler(dataset_test)

    if args.aspect_ratio_group_factor >= 0:
        group_ids = create_aspect_ratio_groups(
            dataset, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids,
                                                  args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(train_sampler,
                                                            args.batch_size,
                                                            drop_last=True)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_sampler=train_batch_sampler,
        num_workers=args.workers,
        collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(dataset_test,
                                                   batch_size=1,
                                                   sampler=test_sampler,
                                                   num_workers=args.workers,
                                                   collate_fn=utils.collate_fn)

    print("Creating model")
    # model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes,
    #                                                          pretrained=args.pretrained)
    model = get_model(num_classes=num_classes)
    model.to(device)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    if args.resume:
        print("----------------------Resume--------------")
        checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        args.start_epoch = checkpoint['epoch'] + 1

    if args.test_only:
        evaluate(model, data_loader_test, device=device)
        return

    print("Start training")
    start_time = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        train_one_epoch(model, optimizer, data_loader, device, epoch,
                        args.print_freq)
        lr_scheduler.step()
        if args.output_dir:
            utils.save_on_master(
                {
                    'model': model_without_ddp.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'lr_scheduler': lr_scheduler.state_dict(),
                    'args': args,
                    'epoch': epoch
                }, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))

        # evaluate after every epoch
        evaluate(model, data_loader_test, device=device)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))
예제 #6
0
def main(args):
    input_size = (224, 224)
    best_acc = 0.0

    # prepare output folder
    if args.output_dir:
        if not Path(args.output_dir).is_dir():
            Path(args.output_dir).mkdir()

    # read config
    with open(args.cfg, 'r') as f:
        cfg_dict = yaml.load(f, Loader=yaml.FullLoader)
    config_stem = Path(args.cfg).stem
    hyp = cfg_dict['hyp']
    data = cfg_dict['data']
    names = np.unique(
        data['names']
    )  # sort as sklearn.preprocessing.LabelEncoder.fit_transform() does

    # set device mode
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # create model
    model_name = args.model
    nc = data['nc']
    feature_extract = hyp['feature_extract']
    print('[INFO] Creating model ({})'.format(model_name))
    model, input_size = initialize_model(model_name, nc, feature_extract)
    model.to(device)

    # load data
    print('[INFO] Loading data')
    train_csv = data['train']
    val_csv = data['val']
    train_dataset, val_dataset, train_sampler = load_data_from_csv(
        train_csv, val_csv, input_size, args.transform)

    # dataloader
    batch_size = hyp['batch_size']
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              sampler=train_sampler,
                              num_workers=args.workers)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=args.workers)

    # criterion + optimizer + scheduler
    learning_rate = hyp['lr']
    momentum = hyp['momentum']
    weight_decay = hyp['weight_decay']
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=learning_rate,
                          momentum=momentum,
                          weight_decay=weight_decay)
    # scheduler = optim.lr_scheduler.MultiStepLR(optimizer,milestones=[0.5*args.total_epochs, 0.8*args.total_epochs], gamma=0.1)

    # create tensorboard writter
    logdir = f'runs/{model_name}_{config_stem}'
    writter = SummaryWriter(log_dir=logdir)

    if args.resume:
        print('[INFO] Load checkpoint')
        ckpt = torch.load(args.resume, map_location=device)
        model.load_state_dict(ckpt['model_state_dict'])
        optimizer.load_state_dict(ckpt['optimizer'])
        args.start_epoch = ckpt['epoch'] + 1
        best_acc = ckpt['best_acc'] if 'best_acc' in ckpt else ckpt['acc']

    if args.eval:
        ckpt_ = torch.load(args.eval, map_location=device)
        model.load_state_dict(ckpt_['model'])
        evaluate(val_loader, model, names, device)
        return

    # train
    start_epoch = args.start_epoch
    total_epochs = hyp['total_epochs']
    try:
        print('[INFO] Starting training')
        start_time = time.time()
        for epoch in range(start_epoch, total_epochs):
            epoch_info = f'Epoch {epoch}/{total_epochs-1}'
            print(epoch_info)
            print('-' * len(epoch_info))

            # train engine
            train_acc, train_loss = train_one_epoch(train_loader, model,
                                                    criterion, optimizer,
                                                    epoch, device)
            val_acc, val_loss = validate(val_loader, model, criterion, device)
            # scheduler.step()

            # logging to tensorboard
            writter.add_scalar('Loss/train', train_loss, epoch)
            writter.add_scalar('Loss/val', val_loss, epoch)
            writter.add_scalar('Acc/train', train_acc, epoch)
            writter.add_scalar('Acc/val', val_acc, epoch)

            # print training info
            info = f'loss ' + f'{train_loss:.3f} ' + f'accuracy ' + f'{train_acc:.1f}% ' \
                    +  f'val_loss ' + f'{val_loss:.3f} ' + f'val_accuracy ' + f'{val_acc:.1f}%' + '\n'
            print(info)
            is_best = val_acc > best_acc
            if is_best:
                best_acc = val_acc
                print('Found new best val_acc: {:6.2f}!\n'.format(best_acc))

            # save checkpoint each 10 epochs
            checkpoint = {
                'epoch': epoch,
                'acc': val_acc,
                'model': model,
                'model_state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            filepath = str(
                Path(args.output_dir).joinpath(
                    f'{model_name}_{config_stem}.pt'))
            save_checkpoint(checkpoint, filepath, epoch, is_best)
    except KeyboardInterrupt:
        print('[INFO] Training interrupted. Saving checkpoint')
        print('[INFO] Best val_acc: {:.2f}'.format(best_acc))
        filepath = str(
            Path(args.output_dir).joinpath(
                f'{model_name}_{config_stem}_{epoch-1}.pt'))
        save_checkpoint(checkpoint, filepath, epoch, force_save=True)
        writter.flush()
        writter.close()
        sys.exit(0)

    # flush and close tensorboard writter
    writter.flush()
    writter.close()

    elapsed_time = time.time() - start_time
    elapsed_str = str(datetime.timedelta(seconds=int(elapsed_time)))
    print('[INFO] Training complete in: {}'.format(elapsed_str))
    print('[INFO] Best val_acc: {:.2f}'.format(best_acc))
    filepath = str(
        Path(args.output_dir).joinpath(f'{model_name}_{config_stem}_final.pt'))
    save_checkpoint(checkpoint, filepath, epoch, force_save=True)
예제 #7
0
                                weight_decay=0.0005)

    # and a learning rate scheduler which decreases the learning rate by
    # 10x every 3 epochs
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=15,
                                                   gamma=0.1)

    # TRAINING LOOP

    save_fr = 1
    print_freq = 25  # make sure that print_freq is smaller than len(dataset) & len(dataset_test)
    os.makedirs('./maskrcnn_saved_models', exist_ok=True)

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model,
                        optimizer,
                        data_loader,
                        device,
                        epoch,
                        print_freq=print_freq)
        if epoch % save_fr == 0:
            torch.save(
                model.state_dict(),
                './maskrcnn_saved_models/mask_rcnn_model_epoch_{}.pt'.format(
                    str(epoch)))
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, data_loader_test, device=device)