Exemplo n.º 1
0
def train(args):
    ckp = None
    if os.path.exists(args.log_dir + '/checkpoints/best.pth'):
        ckp = args.log_dir + '/checkpoints/best.pth'
    model = create_model(args.encoder_type, ckp=ckp).cuda()
    loaders = get_train_val_loaders(args.encoder_type,
                                    batch_size=args.batch_size,
                                    ifold=args.ifold)

    # model, criterion, optimizer
    if args.encoder_type.startswith('myunet'):
        optimizer = RAdam(model.parameters(), lr=args.lr)
    else:
        base_optim = RAdam([
            {
                'params': model.decoder.parameters(),
                'lr': args.lr
            },
            {
                'params': model.encoder.parameters(),
                'lr': args.lr / 10.
            },
        ])
        #base_optim = RAdam(model.parameters(),lr = 0.001)
        optimizer = Lookahead(base_optim, k=5, alpha=0.5)
    #scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=2)

    if args.lrs == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer,
                                      factor=args.factor,
                                      patience=args.patience,
                                      min_lr=args.min_lr)
    else:
        scheduler = CosineAnnealingLR(optimizer,
                                      args.t_max,
                                      eta_min=args.min_lr)

    criterion = smp.utils.losses.BCEDiceLoss(eps=1.)
    runner = SupervisedRunner()

    callbacks = [
        DiceCallback(),
        EarlyStoppingCallback(patience=15, min_delta=0.001),
    ]
    #if os.path.exists(args.log_dir + '/checkpoints/best_full.pth'):
    #    callbacks.append(CheckpointCallback(resume=args.log_dir + '/checkpoints/best_full.pth'))

    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 callbacks=callbacks,
                 logdir=args.log_dir,
                 num_epochs=args.num_epochs,
                 verbose=True)
Exemplo n.º 2
0
def find_class_params(args, models):
    val_loader = get_train_val_loaders(args.encoder_types.split(',')[0], batch_size=args.batch_size)['valid']
    probs, masks = predict_loader(models, val_loader)
    print(probs.shape, masks.shape)

    valid_masks = []
    probabilities = np.zeros((2220, 350, 525))
    for i, (img_probs, img_masks) in enumerate(zip(probs, masks)):
        for m in img_masks:
            if m.shape != (350, 525):
                m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            valid_masks.append(m)

        for j, probability in enumerate(img_probs):
            if probability.shape != (350, 525):
                probability = cv2.resize(probability, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
            probabilities[i * 4 + j, :, :] = probability

    print(len(valid_masks), len(probabilities), valid_masks[0].shape, probabilities[0].shape)
    class_params = {}
    for class_id in range(4):
        print(class_id)
        attempts = []
        for t in range(30, 90, 5):
            t /= 100
            #for ms in [0, 100, 1200, 5000, 10000]:
            for ms in [5000, 10000, 15000, 20000, 22500, 25000]:
            
                masks = []
                for i in range(class_id, len(probabilities), 4):
                    probability = probabilities[i]
                    predict, num_predict = post_process(probability, t, ms)
                    masks.append(predict)

                d = []
                for i, j in zip(masks, valid_masks[class_id::4]):
                    if (i.sum() == 0) & (j.sum() == 0):
                        d.append(1)
                    else:
                        d.append(dice(i, j))

                attempts.append((t, ms, np.mean(d)))

        attempts_df = pd.DataFrame(attempts, columns=['threshold', 'size', 'dice'])


        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        best_threshold = attempts_df['threshold'].values[0]
        best_size = attempts_df['size'].values[0]
        
        class_params[class_id] = (best_threshold, best_size)
    print(class_params)
    return class_params
Exemplo n.º 3
0
def validate_tta(args):
    model, _ = create_model(args)
    if torch.cuda.device_count() > 1:
        model = DataParallel(model)
    model = model.cuda()
    model.eval()

    val_loaders = []

    for i in range(args.tta_num):
        _, val_loader = get_train_val_loaders(
            num_classes=args.num_classes,
            start_index=args.start_index,
            batch_size=args.batch_size,
            val_batch_size=args.val_batch_size,
            val_num=args.val_num,
            other=args.other,
            tta_index=i)
        val_loaders.append(val_loader)

    with torch.no_grad():
        outputs = []
        for tta_data in zip(*val_loaders):
            #print('data:', len(tta_data), len(tta_data[0]), tta_data[0][0].size())
            #print()
            #break
            batch_output = []
            #targets = None
            for img, target in tta_data:
                img = img.cuda()
                output = model(img, None, True)
                batch_output.append(output.detach().cpu())
                #targets = target
            outputs.append(torch.stack(batch_output).mean(0))
        outputs = torch.cat(outputs, 0)
        pred = F.softmax(outputs, dim=1)
        #score, pred = outputs.max(1)
        top1, top10 = accuracy(pred, torch.tensor(val_loaders[0].labels))

        top1, top10 = top1 / val_loaders[0].num, top10 / val_loaders[0].num
        print('top1:', top1)
        print('top10:', top10)
Exemplo n.º 4
0
def get_val_result(batch_size=16, ckp=None):
    model = UNetShipV1(34)
    model_file = os.path.join(settings.MODEL_DIR, model.name, 'best.pth')
    if ckp is None:
        ckp = model_file
    model.load_state_dict(torch.load(ckp))
    model = model.cuda()
    model.eval()

    _, val_loader = get_train_val_loaders(batch_size=batch_size, drop_empty=True)
    outputs = []
    with torch.no_grad():
        for img, target, ship_target in val_loader:
            img, target, ship_target = img.cuda(), target.cuda(), ship_target.cuda()
            output, _ = model(img)
            #print(output.size(), salt_out.size())
            output = torch.sigmoid(output)
            
            for o in output.cpu():
                outputs.append(o.squeeze().numpy())
    return outputs, val_loader.y_true
Exemplo n.º 5
0
def predict(args):
    model, model_file = create_prediction_model(args)

    if args.th > 0:
        fix_th = args.th
    else:
        if args.tuning_val:
            val_loader = get_tuning_loader(args, batch_size=args.batch_size)
        else:
            _, val_loader = get_train_val_loaders(args,
                                                  batch_size=args.batch_size,
                                                  val_num=20000)

        fix_th, fix_score = find_best_thresholds(args, model, val_loader)
        print('fixed th:', fix_th)
        print('fixed score:', fix_score)

    print('using threshold: {}'.format(fix_th))

    if args.val:
        return

    outputs = model_predict(args,
                            model,
                            model_file,
                            args.check,
                            tta_num=args.tta_num)

    classes, _ = get_classes(args.cls_type, args.start_index, args.end_index)

    label_names = []
    pred = (outputs > fix_th).astype(np.uint8)
    for row in pred:
        label_names.append(get_label_names(row, classes))

    if args.check:
        print(label_names)
        return

    create_submission(args, label_names, args.sub_file)
Exemplo n.º 6
0
def train(args):
    print('start training...')
    model, model_file = create_model(args)
    #model = model.cuda()
    if torch.cuda.device_count() > 1:
        model_name = model.name
        model = DataParallel(model)
        model.name = model_name
    model = model.cuda()

    if args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=0.0001)
    else:
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=0.0001)

    if args.lrs == 'plateau':
        lr_scheduler = ReduceLROnPlateau(optimizer,
                                         mode='max',
                                         factor=args.factor,
                                         patience=args.patience,
                                         min_lr=args.min_lr)
    else:
        lr_scheduler = CosineAnnealingLR(optimizer,
                                         args.t_max,
                                         eta_min=args.min_lr)
    #ExponentialLR(optimizer, 0.9, last_epoch=-1) #CosineAnnealingLR(optimizer, 15, 1e-7)

    _, val_loader = get_train_val_loaders(batch_size=args.batch_size,
                                          val_num=args.val_num)

    best_top1_acc = 0.

    print(
        'epoch |    lr    |      %        |  loss  |  avg   |  loss  |  top1  | top10  |  best  | time |  save |'
    )

    if not args.no_first_val:
        top10_acc, best_top1_acc, total_loss = validate(
            args, model, val_loader)
        print(
            'val   |          |               |        |        | {:.4f} | {:.4f} | {:.4f} | {:.4f} |      |       |'
            .format(total_loss, best_top1_acc, top10_acc, best_top1_acc))

    if args.val:
        return

    model.train()

    if args.lrs == 'plateau':
        lr_scheduler.step(best_top1_acc)
    else:
        lr_scheduler.step()
    train_iter = 0

    for epoch in range(args.start_epoch, args.epochs):
        train_loader, val_loader = get_train_val_loaders(
            batch_size=args.batch_size,
            dev_mode=args.dev_mode,
            val_num=args.val_num)

        train_loss = 0

        current_lr = get_lrs(
            optimizer)  #optimizer.state_dict()['param_groups'][2]['lr']
        bg = time.time()
        for batch_idx, data in enumerate(train_loader):
            train_iter += 1
            img, target = data
            img, target = img.cuda(), target.cuda()
            optimizer.zero_grad()
            output = model(img)

            loss = criterion(args, output, target)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), args.batch_size * (batch_idx + 1),
                train_loader.num, loss.item(), train_loss / (batch_idx + 1)),
                  end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                top10_acc, top1_acc, total_loss = validate(
                    args, model, val_loader)

                _save_ckp = ''
                if args.always_save or top1_acc > best_top1_acc:
                    best_top1_acc = top1_acc
                    if isinstance(model, DataParallel):
                        torch.save(model.module.state_dict(), model_file)
                    else:
                        torch.save(model.state_dict(), model_file)
                    _save_ckp = '*'
                print(' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} |  {:4s} |'.
                      format(total_loss, top1_acc, top10_acc, best_top1_acc,
                             (time.time() - bg) / 60, _save_ckp))

                model.train()

                if args.lrs == 'plateau':
                    lr_scheduler.step(top1_acc)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)
Exemplo n.º 7
0
def train(args):
    model, model_file = create_model(args.encoder_type,
                                     work_dir=args.work_dir,
                                     ckp=args.ckp)
    model = model.cuda()

    loaders = get_train_val_loaders(batch_size=args.batch_size)

    #optimizer = RAdam([
    #    {'params': model.decoder.parameters(), 'lr': args.lr},
    #    {'params': model.encoder.parameters(), 'lr': args.lr / 10.},
    #])
    if args.optim_name == 'RAdam':
        optimizer = RAdam(model.parameters(), lr=args.lr)
    elif args.optim_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    elif args.optim_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), momentum=0.9, lr=args.lr)

    #model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)

    if torch.cuda.device_count() > 1:
        model = DataParallel(model)

    if args.lrs == 'plateau':
        lr_scheduler = ReduceLROnPlateau(optimizer,
                                         mode='max',
                                         factor=args.factor,
                                         patience=args.patience,
                                         min_lr=args.min_lr)
    else:
        lr_scheduler = CosineAnnealingLR(optimizer,
                                         args.t_max,
                                         eta_min=args.min_lr)

    best_metrics = 0.
    best_key = 'dice'

    print(
        'epoch |    lr    |      %        |  loss  |  avg   |   loss |  dice  |  best  | time |  save |'
    )

    if not args.no_first_val:
        val_metrics = validate(args, model, loaders['valid'])
        print(
            'val   |          |               |        |        | {:.4f} | {:.4f} | {:.4f} |        |        |'
            .format(val_metrics['loss'], val_metrics['dice'],
                    val_metrics['dice']))

        best_metrics = val_metrics[best_key]

    if args.val:
        return

    model.train()

    #if args.lrs == 'plateau':
    #    lr_scheduler.step(best_metrics)
    #else:
    #    lr_scheduler.step()
    train_iter = 0

    for epoch in range(args.num_epochs):
        train_loss = 0

        current_lr = get_lrs(optimizer)
        bg = time.time()
        for batch_idx, data in enumerate(loaders['train']):
            train_iter += 1
            img, targets = data[0].cuda(), data[1].cuda()
            batch_size = img.size(0)

            outputs = model(img)
            loss = _reduce_loss(criterion(outputs, targets))
            (loss).backward()

            #with amp.scale_loss(loss*batch_size, optimizer) as scaled_loss:
            #    scaled_loss.backward()

            if batch_idx % 4 == 0:
                optimizer.step()
                optimizer.zero_grad()

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]),
                args.batch_size * (batch_idx + 1), loaders['train'].num,
                loss.item(), train_loss / (batch_idx + 1)),
                  end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                save_model(model, model_file + '_latest')
                val_metrics = validate(args, model, loaders['valid'])

                _save_ckp = ''
                if val_metrics[best_key] > best_metrics:
                    best_metrics = val_metrics[best_key]
                    save_model(model, model_file)
                    _save_ckp = '*'
                print(' {:.4f} | {:.4f} | {:.4f} | {:.2f} |  {:4s} |'.format(
                    val_metrics['loss'], val_metrics['dice'], best_metrics,
                    (time.time() - bg) / 60, _save_ckp))

                model.train()

                if args.lrs == 'plateau':
                    lr_scheduler.step(best_metrics)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)
Exemplo n.º 8
0
def train(args):
    print('start training...')

    model = create_model()
    model_file = os.path.join(MODEL_DIR, model.name, 'best.pth')

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    if args.init_ckp is not None:
        CKP = args.init_ckp
    else:
        CKP = model_file
    if os.path.exists(CKP):
        print('loading {}...'.format(CKP))
        model.load_state_dict(torch.load(CKP))
    model = model.cuda()

    if args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=0.001)
    else:
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=0.0001)

    train_loader, val_loader = get_train_val_loaders(
        batch_size=args.batch_size,
        dev_mode=args.dev_mode,
        drop_empty=False,
        img_sz=args.img_sz)

    if args.lrs == 'plateau':
        lr_scheduler = ReduceLROnPlateau(optimizer,
                                         mode='max',
                                         factor=args.factor,
                                         patience=args.patience,
                                         min_lr=args.min_lr)
    else:
        lr_scheduler = CosineAnnealingLR(optimizer,
                                         args.t_max,
                                         eta_min=args.min_lr)
    #ExponentialLR(optimizer, 0.9, last_epoch=-1) #CosineAnnealingLR(optimizer, 15, 1e-7)

    print(
        'epoch |   lr    |   %        |  loss  |  avg   | f loss | lovaz  |  bce   |  cls   |  iou   | iout   |  best  | time | save |  ship  |'
    )

    best_iout, _iou, _f, _l, _b, _ship, best_cls_acc = validate(
        args, model, val_loader, args.start_epoch)
    print(
        'val   |         |            |        |        | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} |      |      | {:.4f} |'
        .format(_f, _l, _b, _ship, _iou, best_iout, best_cls_acc,
                best_cls_acc))
    if args.val:
        return

    model.train()

    if args.lrs == 'plateau':
        lr_scheduler.step(best_iout)
    else:
        lr_scheduler.step()
    train_iter = 0

    for epoch in range(args.start_epoch, args.epochs):
        train_loss = 0

        current_lr = get_lrs(
            optimizer)  #optimizer.state_dict()['param_groups'][2]['lr']
        bg = time.time()
        for batch_idx, data in enumerate(train_loader):
            train_iter += 1
            img, target, salt_target = data
            img, target, salt_target = img.cuda(), target.cuda(
            ), salt_target.cuda()
            optimizer.zero_grad()
            salt_out = model(img)

            loss = F.binary_cross_entropy_with_logits(salt_out.squeeze(),
                                                      salt_target)
            loss.backward()

            optimizer.step()

            train_loss += loss.item()
            print('\r {:4d} | {:.5f} | {:4d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), args.batch_size * (batch_idx + 1),
                train_loader.num, loss.item(), train_loss / (batch_idx + 1)),
                  end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                iout, iou, focal_loss, lovaz_loss, bce_loss, cls_loss, cls_acc = validate(
                    args, model, val_loader, epoch=epoch)

                _save_ckp = ''
                if cls_acc > best_cls_acc:
                    best_cls_acc = cls_acc
                    torch.save(model.state_dict(), model_file)
                    _save_ckp = '*'
                # print('epoch |   lr    |   %       |  loss  |  avg   | f loss | lovaz  |  bce   |  cls   |  iou   | iout   |  best  | time | save |  ship  |')
                print(
                    ' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} | {:4s} | {:.4f} |'
                    .format(focal_loss, lovaz_loss, bce_loss, cls_loss, iou,
                            iout, best_cls_acc, (time.time() - bg) / 60,
                            _save_ckp, cls_acc))

                #log.info('epoch {}: train loss: {:.4f} focal loss: {:.4f} lovaz loss: {:.4f} iout: {:.4f} best iout: {:.4f} iou: {:.4f} lr: {} {}'
                #    .format(epoch, train_loss, focal_loss, lovaz_loss, iout, best_iout, iou, current_lr, _save_ckp))

                model.train()

                if args.lrs == 'plateau':
                    lr_scheduler.step(cls_acc)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)

    del model, train_loader, val_loader, optimizer, lr_scheduler
Exemplo n.º 9
0
    device = 'cuda'

model = NeuralNet()
'''
if torch.cuda.device_count()>1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model, device_ids=range(torch.cuda.device_count))
'''
model.to(device)


learning_rate = 1e-3
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

train_loader, val_loader = get_train_val_loaders()
test_loader = get_test_loader()




epochs = int(1e3)

acc_train_list = []
acc_val_list = []
acc_test_list = []

start = time.time()
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    loss_train, accuracy_train = train_loop(
Exemplo n.º 10
0
def find_class_params(args):
    runner = SupervisedRunner()
    model = create_model(args.encoder_type)
    valid_loader = get_train_val_loaders(args.encoder_type,
                                         batch_size=args.batch_size)['valid']

    encoded_pixels = []
    loaders = {"infer": valid_loader}
    runner.infer(
        model=model,
        loaders=loaders,
        callbacks=[CheckpointCallback(resume=args.ckp),
                   InferCallback()],
    )
    print(runner.callbacks)
    valid_masks = []
    probabilities = np.zeros((2220, 350, 525))
    for i, (batch, output) in enumerate(
            tqdm(
                zip(valid_loader.dataset,
                    runner.callbacks[0].predictions["logits"]))):
        image, mask = batch
        for m in mask:
            if m.shape != (350, 525):
                m = cv2.resize(m,
                               dsize=(525, 350),
                               interpolation=cv2.INTER_LINEAR)
            valid_masks.append(m)

        for j, probability in enumerate(output):
            if probability.shape != (350, 525):
                probability = cv2.resize(probability,
                                         dsize=(525, 350),
                                         interpolation=cv2.INTER_LINEAR)
            probabilities[i * 4 + j, :, :] = probability

    class_params = {}
    for class_id in range(4):
        print(class_id)
        attempts = []
        for t in range(0, 100, 5):
            t /= 100
            #for ms in [0, 100, 1200, 5000, 10000]:
            for ms in [5000, 10000, 15000, 20000, 22500, 25000, 30000]:

                masks = []
                for i in range(class_id, len(probabilities), 4):
                    probability = probabilities[i]
                    predict, num_predict = post_process(
                        sigmoid(probability), t, ms)
                    masks.append(predict)

                d = []
                for i, j in zip(masks, valid_masks[class_id::4]):
                    if (i.sum() == 0) & (j.sum() == 0):
                        d.append(1)
                    else:
                        d.append(dice(i, j))

                attempts.append((t, ms, np.mean(d)))

        attempts_df = pd.DataFrame(attempts,
                                   columns=['threshold', 'size', 'dice'])

        attempts_df = attempts_df.sort_values('dice', ascending=False)
        print(attempts_df.head())
        best_threshold = attempts_df['threshold'].values[0]
        best_size = attempts_df['size'].values[0]

        class_params[class_id] = (best_threshold, best_size)
    print(class_params)
    return class_params, runner
Exemplo n.º 11
0
Arquivo: train.py Projeto: chicm/yt8m
def train(args):
    print('start training...')
    model, model_file = create_model(args)
    train_loader, val_loader = get_train_val_loaders(
        batch_size=args.batch_size, val_batch_size=args.val_batch_size)
    train_loader = get_frame_train_loader(batch_size=args.batch_size)
    #model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)

    if args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=0.0001)
    elif args.optim == 'RAdam':
        optimizer = RAdam(model.parameters(), lr=args.lr, weight_decay=0.0001)
    else:
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=0.0001)

    if args.lrs == 'plateau':
        lr_scheduler = ReduceLROnPlateau(optimizer,
                                         mode='min',
                                         factor=args.factor,
                                         patience=args.patience,
                                         min_lr=args.min_lr)
    else:
        lr_scheduler = CosineAnnealingLR(optimizer,
                                         args.t_max,
                                         eta_min=args.min_lr)

    model = model.cuda()
    if torch.cuda.device_count() > 1:
        model_name = model.name
        model = DataParallel(model)
        model.name = model_name

    #model=model.train()

    best_f2 = 99999.
    best_key = 'loss'

    print(
        'epoch |    lr     |       %        |  loss  |  avg   |  loss  |  0.01  |  0.20  |  0.50  |  best  | time |  save |'
    )

    if not args.no_first_val:
        val_metrics = validate(args, model, val_loader)
        print(
            'val   |           |                |        |        | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} |       |        |'
            .format(val_metrics['loss'], val_metrics['f2_th_0.01'],
                    val_metrics['f2_th_0.20'], val_metrics['f2_th_0.50'],
                    val_metrics[best_key]))

        best_f2 = val_metrics[best_key]

    if args.val:
        return

    model.train()

    if args.lrs == 'plateau':
        lr_scheduler.step(best_f2)
    else:
        lr_scheduler.step()

    train_iter = 0

    for epoch in range(args.start_epoch, args.num_epochs):
        #train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, val_num=args.val_num)

        train_loss = 0

        current_lr = get_lrs(optimizer)
        bg = time.time()
        for batch_idx, data in enumerate(train_loader):
            train_iter += 1
            if train_loader.seg:
                rgb, audio, labels = [x.cuda() for x in data]
            else:
                rgb, audio, labels = data[0].cuda(), data[2].cuda(
                ), data[4].cuda()

            output = model(rgb, audio)

            loss = criterion(output, labels)
            batch_size = rgb.size(0)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            #with amp.scale_loss(loss, optimizer) as scaled_loss:
            #    scaled_loss.backward()

            train_loss += loss.item()
            print('\r {:4d} | {:.7f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), args.batch_size * (batch_idx + 1),
                train_loader.num, loss.item(), train_loss / (batch_idx + 1)),
                  end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                if isinstance(model, DataParallel):
                    torch.save(model.module.state_dict(),
                               model_file + '_latest')
                else:
                    torch.save(model.state_dict(), model_file + '_latest')

                val_metrics = validate(args, model, val_loader)

                _save_ckp = ''
                if args.always_save or val_metrics[best_key] < best_f2:
                    best_f2 = val_metrics[best_key]
                    if isinstance(model, DataParallel):
                        torch.save(model.module.state_dict(), model_file)
                    else:
                        torch.save(model.state_dict(), model_file)
                    _save_ckp = '*'
                print(
                    ' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} |  {:4s} |'
                    .format(val_metrics['loss'], val_metrics['f2_th_0.01'],
                            val_metrics['f2_th_0.20'],
                            val_metrics['f2_th_0.50'], best_f2,
                            (time.time() - bg) / 60, _save_ckp))

                model.train()
                if args.lrs == 'plateau':
                    lr_scheduler.step(best_f2)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)
Exemplo n.º 12
0
def train(args):
    print('start training...')
    model, model_file = create_model(args)
    train_loader, val_loader = get_train_val_loaders(batch_size=args.train_batch_size, val_batch_size=args.val_batch_size)
    frame_loader, _ = get_frame_train_loader(batch_size=args.frame_batch_size)
    #model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)

    if args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001)
    elif args.optim == 'RAdam':
        optimizer = RAdam(model.parameters(), lr=args.lr, weight_decay=0.0001)
    else:
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001)

    if args.lrs == 'plateau':
        lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr)
    else:
        lr_scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr)

    model = model.cuda()
    if torch.cuda.device_count() > 1:
        model_name = model.name
        model = DataParallel(model)
        model.name = model_name

    #model=model.train()

    best_f2 = 0.
    best_key = 'top1'

    print('epoch |    lr     |       %        |  loss  |  avg   |  loss  |  top1   |  top10  |  best  | time |  save |')

    if not args.no_first_val:
        val_metrics = validate(args, model, val_loader)
        print('val   |           |                |        |        | {:.4f} | {:.4f} | {:.4f} | {:.4f} |       |        |'.format(
            val_metrics['valid_loss'], val_metrics['top1'], val_metrics['top10'], val_metrics[best_key] ))

        best_f2 = val_metrics[best_key]

    if args.val:
        return

    model.train()

    if args.lrs == 'plateau':
        lr_scheduler.step(best_f2)
    else:
        lr_scheduler.step()


    #for epoch in range(args.start_epoch, args.num_epochs):
    def get_batch(loader, iterator=None, epoch=0, batch_idx=0):
        ret_epoch = epoch
        ret_batch_idx = batch_idx + 1
        if iterator is None:
            iterator = loader.__iter__()
        try:
            b = iterator.__next__()
        except StopIteration:
            iterator = loader.__iter__()
            b = iterator.__next__()
            ret_epoch += 1
            ret_epoch = 0
        return b, iterator, epoch, ret_batch_idx     

    frame_epoch = args.start_epoch
    train_epoch = 0
    frame_iter = frame_loader.__iter__()
    train_iter = train_loader.__iter__()
    train_step = 0
    frame_batch_idx = -1
    train_batch_idx = -1


    while frame_epoch <= args.num_epochs:
        frame_loss = 0.
        train_loss = 0.
        current_lr = get_lrs(optimizer)
        bg = time.time()

        def train_batch(rgb, audio, labels):
            output = model(rgb, audio)
            
            loss = criterion(output, labels)
            batch_size = rgb.size(0)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            return loss.item()


        for i in range(200):
            batch, frame_iter, frame_epoch, frame_batch_idx = get_batch(frame_loader, frame_iter, frame_epoch, frame_batch_idx)
            rgb, audio, labels = batch[0].cuda(), batch[2].cuda(), batch[4].cuda()
            
            loss_val = train_batch(rgb, audio, labels)
            frame_loss += loss_val
            print('\r F{:4d} | {:.7f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                frame_epoch, float(current_lr[0]), args.frame_batch_size*(frame_batch_idx+1), frame_loader.num, loss_val, frame_loss/(i+1)), end='')
        print('')
        for i in range(100):
            batch, train_iter, train_epoch, train_batch_idx = get_batch(train_loader, train_iter, train_epoch, train_batch_idx)
            rgb, audio, labels = [x.cuda() for x in batch]
            
            loss_val = train_batch(rgb, audio, labels)
            train_loss += loss_val
            print('\r T{:4d} | {:.7f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                train_epoch, float(current_lr[0]), args.train_batch_size*(train_batch_idx+1), train_loader.num, loss_val, train_loss/(i+1)), end='')


        if train_step > 0 and train_step % args.iter_val == 0:
            if isinstance(model, DataParallel):
                torch.save(model.module.state_dict(), model_file+'_latest')
            else:
                torch.save(model.state_dict(), model_file+'_latest')

            val_metrics = validate(args, model, val_loader)
            
            _save_ckp = ''
            if args.always_save or val_metrics[best_key] > best_f2:
                best_f2 = val_metrics[best_key]
                if isinstance(model, DataParallel):
                    torch.save(model.module.state_dict(), model_file)
                else:
                    torch.save(model.state_dict(), model_file)
                _save_ckp = '*'
            print(' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} |  {:4s} |'.format(
                val_metrics['valid_loss'], val_metrics['top1'], val_metrics['top10'], best_f2,
                (time.time() - bg) / 60, _save_ckp))

            model.train()
            if args.lrs == 'plateau':
                lr_scheduler.step(best_f2)
            else:
                lr_scheduler.step()
            current_lr = get_lrs(optimizer)
    
        train_step += 1
Exemplo n.º 13
0
def train(args):
    print('start training...')
    model, model_file = create_model(args)
    #model = model.cuda()

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    train_loader, val_loader = get_train_val_loaders(
        batch_size=args.batch_size,
        val_batch_size=args.val_batch_size,
        val_num=args.val_num)
    num_train_optimization_steps = args.num_epochs * train_loader.num // train_loader.batch_size

    if args.optim_name == 'BertAdam':
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=args.lr,
                             warmup=args.warmup,
                             t_total=num_train_optimization_steps)
    else:
        if args.optim_name == 'Adam':
            optimizer = optim.Adam(optimizer_grouped_parameters, lr=args.lr)
            lr_scheduler = ReduceLROnPlateau(optimizer,
                                             mode='max',
                                             factor=0.6,
                                             patience=4,
                                             min_lr=1e-6)
        elif args.optim_name == 'SGD':
            optimizer = optim.SGD(optimizer_grouped_parameters,
                                  lr=args.lr,
                                  momentum=0.9)  #, weight_decay=1e-4)
            lr_scheduler = ReduceLROnPlateau(optimizer,
                                             mode='max',
                                             factor=0.6,
                                             patience=4,
                                             min_lr=1e-6)
        else:
            raise AssertionError('wrong optimizer name')

        if args.lrs == 'plateau':
            lr_scheduler = ReduceLROnPlateau(optimizer,
                                             mode='max',
                                             factor=args.factor,
                                             patience=args.patience,
                                             min_lr=args.min_lr)
        else:
            lr_scheduler = CosineAnnealingLR(optimizer,
                                             args.t_max,
                                             eta_min=args.min_lr)
        #ExponentialLR(optimizer, 0.9, last_epoch=-1) #CosineAnnealingLR(optimizer, 15, 1e-7)

    #_, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, val_num=args.val_num)

    #model.module.freeze()

    best_f2 = 999.
    best_key = 'roc'

    print(
        'epoch |    lr     |       %        |  loss  |  avg   |  loss  |  acc   |  prec  | recall |   roc  |  best  | time |  save |'
    )

    if not args.no_first_val:
        val_metrics = validate(args, model, val_loader)
        print(
            'val   |           |                |        |        | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} |       |        |'
            .format(val_metrics['valid_loss'], val_metrics['acc'],
                    val_metrics['precision'], val_metrics['recall'],
                    val_metrics['roc'], val_metrics[best_key]))

        best_f2 = val_metrics[best_key]

    if args.val:
        return

    model.train()

    if args.optim_name != 'BertAdam':
        if args.lrs == 'plateau':
            lr_scheduler.step(best_f2)
        else:
            lr_scheduler.step()
    train_iter = 0

    for epoch in range(args.start_epoch, args.num_epochs):
        #train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, val_num=args.val_num)

        train_loss = 0

        current_lr = get_lrs(
            optimizer)  #optimizer.state_dict()['param_groups'][2]['lr']
        bg = time.time()
        for batch_idx, data in enumerate(train_loader):
            train_iter += 1
            img, target, target_aux, weights = data
            img, target, target_aux, weights = img.cuda(), target.cuda(
            ), target_aux.cuda(), weights.cuda()

            output, output_aux = model(img)
            output = output.squeeze()

            loss = criterion(output, output_aux, target, target_aux, weights)
            #loss_aux = _reduce_loss(criterion(output_aux, target_aux.float()))

            batch_size = img.size(0)
            (batch_size * loss).backward()

            optimizer.step()
            optimizer.zero_grad()

            train_loss += loss.item()
            print('\r {:4d} | {:.7f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), args.batch_size * (batch_idx + 1),
                train_loader.num, loss.item(), train_loss / (batch_idx + 1)),
                  end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                if isinstance(model, DataParallel):
                    torch.save(model.module.state_dict(),
                               model_file + '_latest')
                else:
                    torch.save(model.state_dict(), model_file + '_latest')

                val_metrics = validate(args, model, val_loader)

                _save_ckp = ''
                if args.always_save or val_metrics[best_key] > best_f2:
                    best_f2 = val_metrics[best_key]
                    if isinstance(model, DataParallel):
                        torch.save(model.module.state_dict(), model_file)
                    else:
                        torch.save(model.state_dict(), model_file)
                    _save_ckp = '*'
                print(
                    ' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} |  {:4s} |'
                    .format(val_metrics['valid_loss'], val_metrics['acc'],
                            val_metrics['precision'], val_metrics['recall'],
                            val_metrics['roc'], best_f2,
                            (time.time() - bg) / 60, _save_ckp))

                model.train()

                if args.optim_name != 'BertAdam':
                    if args.lrs == 'plateau':
                        lr_scheduler.step(best_f2)
                    else:
                        lr_scheduler.step()
                current_lr = get_lrs(optimizer)
Exemplo n.º 14
0
def train(args):
    print('start training...')
    #model, model_file = create_single_class_model(args, num_classes=args.end_index-args.start_index)
    model, model_file = create_model(args)
    #model = model.cuda()

    if args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001)
    else:
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001)

    if args.lrs == 'plateau':
        lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr)
    else:
        lr_scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr)
    #ExponentialLR(optimizer, 0.9, last_epoch=-1) #CosineAnnealingLR(optimizer, 15, 1e-7) 

    _, f2_val_loader = get_train_val_loaders(args, batch_size=args.batch_size)
    tuning_loader = get_tuning_loader(args, batch_size=args.batch_size)
    #val_loader = get_val_loader(args, 0)

    best_cls_acc = 0.

    print('epoch |   lr    |   %        |  loss  |  avg   |  loss  |  cls   |  num   |  top1  | top10  |  best  |   f2   |  t f2  | time |  save  |')

    if not args.no_first_val:
        #best_cls_acc, top1_acc, total_loss, cls_loss, num_loss = f2_validate(args, model, f2_val_loader)#validate_avg(args, model, args.start_epoch)
        best_cls_acc, top1_acc, total_loss, cls_loss, num_loss = validate_avg(args, model, args.start_epoch)
        _, f2, _, _, _ = tr.validate(args, model, f2_val_loader, args.batch_size)
        _, tuning_f2, _, _, _ = tr.validate(args, model, tuning_loader, args.batch_size)
        print('val   |         |            |        |        | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} |      |      |'.format(
            total_loss, cls_loss, num_loss, top1_acc, best_cls_acc, best_cls_acc, f2, tuning_f2))

    if args.val:
        return

    model.train()

    if args.lrs == 'plateau':
        lr_scheduler.step(best_cls_acc)
    else:
        lr_scheduler.step()
    train_iter = 0

    for epoch in range(args.start_epoch, args.epochs):
        train_loader = get_train_loader(args, batch_size=args.batch_size, dev_mode=args.dev_mode)

        train_loss = 0

        current_lr = get_lrs(optimizer)  #optimizer.state_dict()['param_groups'][2]['lr']
        bg = time.time()
        for batch_idx, data in enumerate(train_loader):
            train_iter += 1
            img, target, num_target = data
            img, target, num_target = img.cuda(), target.cuda(), num_target.cuda()
            optimizer.zero_grad()
            output, num_output = model(img)
            
            loss, _, _ = criterion(args, output, target, num_output, num_target, epoch)
            loss.backward()
 
            optimizer.step()

            train_loss += loss.item()
            print('\r {:4d} | {:.5f} | {:4d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), args.batch_size*(batch_idx+1), train_loader.num, loss.item(), train_loss/(batch_idx+1)), end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                #cls_acc, top1_acc, total_loss, cls_loss, num_loss = validate(args, model, f2_val_loader)
                cls_acc, top1_acc, total_loss, cls_loss, num_loss = validate_avg(args, model)
                _, f2, _, _, _ = tr.validate(args, model, f2_val_loader, args.batch_size)
                _, tuning_f2, _, _, _ = tr.validate(args, model, tuning_loader, args.batch_size)
                
                _save_ckp = ''
                if args.always_save or cls_acc > best_cls_acc:
                    best_cls_acc = cls_acc
                    torch.save(model.state_dict(), model_file)
                    _save_ckp = '*'
                print('  {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} |  {:4s} |'.format(
            total_loss, cls_loss, num_loss, top1_acc, cls_acc, best_cls_acc, f2, tuning_f2, (time.time() - bg) / 60, _save_ckp))


                model.train()
                
                if args.lrs == 'plateau':
                    lr_scheduler.step(cls_acc)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)