pass try: history['GLOVE'] = CONFIG['GLOVE'].copy() except KeyError: pass try: del CONFIG['EMBEDDING_OPTIONS']['input_dim'] except KeyError: pass try: del CONFIG['EMBEDDING_OPTIONS']['input_length'] except KeyError: pass try: del CONFIG['GLOVE']['VOCAB_COVERAGE'] except KeyError: pass try: del CONFIG['GLOVE']['TEXT_COVERAGE'] except KeyError: pass CONFIG['KFOLD_HISTORY'].append(history) log_model(CONFIG)
test_data_callback = TestDataCallback(x_test=x_test, y_test=y_test) model = None gc.collect() model = Keras.get_bert_model(bert_layer=bert_layer, input_length=INPUT_LENGTH, optimizer=MODEL['OPTIMIZER'], learning_rate=MODEL['LEARNING_RATE']) history = None gc.collect() history = model.fit(x_train, y_train, epochs=MODEL['EPOCHS'], batch_size=MODEL['BATCH_SIZE'], verbose=1, validation_data=(x_val, y_val), callbacks=[test_data_callback]) gc.collect() model_history = history.history.copy() model_history['test_loss'] = test_data_callback.loss model_history['test_accuracy'] = test_data_callback.accuracy MODEL['KFOLD_HISTORY'].append(model_history) log_model(MODEL)
def main(**kwargs): # 1. Parse command line arguments. opt._parse(kwargs) # 2. Visdom # vis = Visualizer(env=opt.env) # 3. GPU settings # n_gpu = utils.set_gpu('0,1') # 4. Configure model logging.info('==> Traing model for clothing type: {}'.format(opt.category)) cudnn.benchmark = True net = getattr(models, opt.model)(opt) # 5. Initialize logger cur_time = time.strftime('%Y-%m-%dT%H:%M:%S', timm.localtime()) initialize_logger(f'{opt.category}_{opt.model}_{cur_time}') # 6. Initialize checkpoints directory lr = opt.lr start_epoch = 1 best_val_loss = float('inf') if opt.load_checkpoint_path: logging.info('==> Resuming from checkpoint...') checkpoint = torch.load(opt.load_checkpoint_path) start_epoch = checkpoint['epoch'] + 1 lr = checkpoint['lr'] best_val_loss = checkpoint['best_val_loss'] net.load_state_dict(checkpoint['state_dict']) # 7. Data setup train_dataset = FashionAIKeypoints(opt, phase='train') logging.info('Train sample number: {}'.format(len(train_dataset))) train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, collate_fn=train_dataset.collate_fn, pin_memory=True) val_dataset = FashionAIKeypoints(opt, phase='val') logging.info('Val sample number: {}'.format(len(val_dataset))) val_loader = DataLoader(val_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, collate_fn=val_dataset.collate_fn, pin_memory=True) net = net.cuda() # net = DataParallel(net) loss = CPNLoss() loss = loss.cuda() # 8. Loss, optimizer and LR scheduler optimizer = torch.optim.SGD(net.parameters(), lr, momentum=0.9, weight_decay=1e-4) lrs = LRScheduler(lr, patience=3, factor=0.1, min_lr=0.01 * lr, best_loss=best_val_loss) # 9. Training loop for epoch in range(start_epoch, opt.max_epochs + 1): # Training logging.info("Start training loop...") train_metrics, train_time = train(train_loader, net, loss, optimizer, lr) # Validating logging.info("Start validating loop...") with torch.no_grad(): val_metrics, val_time = validate(val_loader, net, loss) log_model(epoch, lr, train_metrics, train_time, val_metrics, val_time) val_loss = np.mean(val_metrics[:, 0]) lr = lrs.update_by_rule(val_loss) # Save checkpoints if val_loss < best_val_loss or epoch % 10 == 0 or lr is None: if val_loss < best_val_loss: best_val_loss = val_loss state_dict = net.module.state_dict() for key in state_dict.keys(): state_dict[key] = state_dict[key].cpu() torch.save( { 'epoch': epoch, 'save_dir': opt.checkpoint_path, 'state_dict': state_dict, 'lr': lr, 'best_val_loss': best_val_loss }, opt.checkpoint_path / 'kpt_{}_{:03d}.ckpt'.format(opt.category, epoch)) if lr is None: logging.info('Training is early-stopped') break