Пример #1
0
                pass

            try:
                history['GLOVE'] = CONFIG['GLOVE'].copy()
            except KeyError:
                pass

            try:
                del CONFIG['EMBEDDING_OPTIONS']['input_dim']
            except KeyError:
                pass

            try:
                del CONFIG['EMBEDDING_OPTIONS']['input_length']
            except KeyError:
                pass

            try:
                del CONFIG['GLOVE']['VOCAB_COVERAGE']
            except KeyError:
                pass

            try:
                del CONFIG['GLOVE']['TEXT_COVERAGE']
            except KeyError:
                pass

            CONFIG['KFOLD_HISTORY'].append(history)

            log_model(CONFIG)
Пример #2
0
        test_data_callback = TestDataCallback(x_test=x_test, y_test=y_test)

        model = None
        gc.collect()

        model = Keras.get_bert_model(bert_layer=bert_layer,
                                     input_length=INPUT_LENGTH,
                                     optimizer=MODEL['OPTIMIZER'],
                                     learning_rate=MODEL['LEARNING_RATE'])

        history = None
        gc.collect()

        history = model.fit(x_train,
                            y_train,
                            epochs=MODEL['EPOCHS'],
                            batch_size=MODEL['BATCH_SIZE'],
                            verbose=1,
                            validation_data=(x_val, y_val),
                            callbacks=[test_data_callback])

        gc.collect()

        model_history = history.history.copy()
        model_history['test_loss'] = test_data_callback.loss
        model_history['test_accuracy'] = test_data_callback.accuracy

        MODEL['KFOLD_HISTORY'].append(model_history)

        log_model(MODEL)
def main(**kwargs):
    # 1. Parse command line arguments.
    opt._parse(kwargs)

    # 2. Visdom
    # vis = Visualizer(env=opt.env)

    # 3. GPU settings
    # n_gpu = utils.set_gpu('0,1')

    # 4. Configure model
    logging.info('==> Traing model for clothing type: {}'.format(opt.category))
    cudnn.benchmark = True
    net = getattr(models, opt.model)(opt)

    # 5. Initialize logger
    cur_time = time.strftime('%Y-%m-%dT%H:%M:%S', timm.localtime())
    initialize_logger(f'{opt.category}_{opt.model}_{cur_time}')

    # 6. Initialize checkpoints directory
    lr = opt.lr
    start_epoch = 1
    best_val_loss = float('inf')

    if opt.load_checkpoint_path:
        logging.info('==> Resuming from checkpoint...')
        checkpoint = torch.load(opt.load_checkpoint_path)
        start_epoch = checkpoint['epoch'] + 1
        lr = checkpoint['lr']
        best_val_loss = checkpoint['best_val_loss']
        net.load_state_dict(checkpoint['state_dict'])

    # 7. Data setup
    train_dataset = FashionAIKeypoints(opt, phase='train')
    logging.info('Train sample number: {}'.format(len(train_dataset)))
    train_loader = DataLoader(train_dataset,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              num_workers=opt.num_workers,
                              collate_fn=train_dataset.collate_fn,
                              pin_memory=True)

    val_dataset = FashionAIKeypoints(opt, phase='val')
    logging.info('Val sample number: {}'.format(len(val_dataset)))
    val_loader = DataLoader(val_dataset,
                            batch_size=opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            collate_fn=val_dataset.collate_fn,
                            pin_memory=True)

    net = net.cuda()
    # net = DataParallel(net)
    loss = CPNLoss()
    loss = loss.cuda()

    # 8. Loss, optimizer and LR scheduler
    optimizer = torch.optim.SGD(net.parameters(),
                                lr,
                                momentum=0.9,
                                weight_decay=1e-4)
    lrs = LRScheduler(lr,
                      patience=3,
                      factor=0.1,
                      min_lr=0.01 * lr,
                      best_loss=best_val_loss)

    # 9. Training loop
    for epoch in range(start_epoch, opt.max_epochs + 1):
        # Training
        logging.info("Start training loop...")
        train_metrics, train_time = train(train_loader, net, loss, optimizer,
                                          lr)

        # Validating
        logging.info("Start validating loop...")
        with torch.no_grad():
            val_metrics, val_time = validate(val_loader, net, loss)

        log_model(epoch, lr, train_metrics, train_time, val_metrics, val_time)

        val_loss = np.mean(val_metrics[:, 0])
        lr = lrs.update_by_rule(val_loss)

        # Save checkpoints
        if val_loss < best_val_loss or epoch % 10 == 0 or lr is None:
            if val_loss < best_val_loss:
                best_val_loss = val_loss

            state_dict = net.module.state_dict()

            for key in state_dict.keys():
                state_dict[key] = state_dict[key].cpu()

            torch.save(
                {
                    'epoch': epoch,
                    'save_dir': opt.checkpoint_path,
                    'state_dict': state_dict,
                    'lr': lr,
                    'best_val_loss': best_val_loss
                }, opt.checkpoint_path /
                'kpt_{}_{:03d}.ckpt'.format(opt.category, epoch))

        if lr is None:
            logging.info('Training is early-stopped')
            break