def model_train_pred(fold, seed): seed_everything(seed) model_path = os.path.join( model_dir, model_file_name + f"_SEED{seed}_FOLD{fold}.pth") trn_idx = df_train[df_train['fold'] != fold].index val_idx = df_train[df_train['fold'] == fold].index x_fold_train = df_train_x.loc[trn_idx].reset_index( drop=True).copy() y_fold_train = df_train_y.loc[trn_idx].reset_index( drop=True).copy() x_fold_val = df_train_x.loc[val_idx].reset_index(drop=True).copy() y_fold_val = df_train_y.loc[val_idx].reset_index(drop=True).copy() df_test_x_copy = df_test_x.copy() x_fold_train, x_fold_val, df_test_x_copy = normalize( x_fold_train, x_fold_val, df_test_x_copy) train_dataset = TrainDataset(x_fold_train.values, y_fold_train.values) valid_dataset = TrainDataset(x_fold_val.values, y_fold_val.values) trainloader = torch.utils.data.DataLoader( train_dataset, batch_size=self.BATCH_SIZE, shuffle=True) validloader = torch.utils.data.DataLoader( valid_dataset, batch_size=self.BATCH_SIZE, shuffle=False) model = SimpleNN_Model(num_features=num_features, num_targets=num_targets, hidden_size=hidden_size) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), weight_decay=WEIGHT_DECAY, lr=self.LEARNING_RATE) scheduler = optim.lr_scheduler.OneCycleLR( optimizer=optimizer, pct_start=0.2, div_factor=1e3, max_lr=1e-2, epochs=self.EPOCHS, steps_per_epoch=len(trainloader)) loss_train = SmoothBCEwLogits(smoothing=0.001) loss_val = nn.BCEWithLogitsLoss() early_stopping_steps = EARLY_STOPPING_STEPS early_step = 0 oof = np.zeros(df_train_y.shape) best_loss = np.inf best_loss_epoch = -1 if IS_TRAIN: for epoch in range(self.EPOCHS): train_loss = train_fn(model, optimizer, scheduler, loss_train, trainloader, DEVICE) valid_loss, valid_preds = valid_fn(model, loss_val, validloader, DEVICE) if valid_loss < best_loss: best_loss = valid_loss best_loss_epoch = epoch oof[val_idx] = valid_preds torch.save(model.state_dict(), model_path) elif (EARLY_STOP == True): early_step += 1 if (early_step >= early_stopping_steps): break if epoch % 10 == 0 or epoch == self.EPOCHS - 1: print(f"seed: {seed}, FOLD: {fold}, EPOCH: {epoch},\ train_loss: {train_loss:.6f}, valid_loss: {valid_loss:.6f}, best_loss: {best_loss:.6f},\ best_loss_epoch: {best_loss_epoch}") #--------------------- PREDICTION--------------------- testdataset = TestDataset(df_test_x_copy.values) testloader = torch.utils.data.DataLoader( testdataset, batch_size=self.BATCH_SIZE, shuffle=False) model = SimpleNN_Model(num_features=num_features, num_targets=num_targets, hidden_size=hidden_size) model.load_state_dict(torch.load(model_path)) model.to(DEVICE) if not IS_TRAIN: valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE) oof[val_idx] = valid_preds predictions = np.zeros(df_test_y.shape) predictions = inference_fn(model, testloader, DEVICE) return oof, predictions
def model_train_pred(fold, Model=CNN_Model, df_train_y=df_train_y, df_test_y=df_test_y, features=features, file_name=model_file_name): model_path = os.path.join(model_dir, file_name + f"_FOLD{fold}.pth") x_fold_train, y_fold_train, x_fold_val, y_fold_val, df_test_x_copy, val_idx = \ preprocess(fold, df_train, df_train_x, df_train_y, df_test_x, no_of_components) train_dataset = TrainDataset(x_fold_train.values, y_fold_train.values) valid_dataset = TrainDataset(x_fold_val.values, y_fold_val.values) trainloader = torch.utils.data.DataLoader( train_dataset, batch_size=self.BATCH_SIZE, shuffle=True) validloader = torch.utils.data.DataLoader( valid_dataset, batch_size=self.BATCH_SIZE, shuffle=False) model = Model(num_features=num_features, num_targets=num_targets, hidden_size=hidden_size) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=self.LEARNING_RATE, weight_decay=WEIGHT_DECAY, eps=1e-9) scheduler = optim.lr_scheduler.OneCycleLR( optimizer=optimizer, pct_start=0.1, div_factor=1e3, max_lr=1e-2, epochs=self.EPOCHS, steps_per_epoch=len(trainloader)) loss_train = SmoothBCEwLogits(smoothing=0.001, pos_weight=pos_weight) loss_val = nn.BCEWithLogitsLoss() early_stopping_steps = EARLY_STOPPING_STEPS early_step = 0 oof = np.zeros(df_train_y.shape) best_loss = np.inf best_loss_epoch = -1 for epoch in range(self.EPOCHS): train_loss = train_fn(model, optimizer, scheduler, loss_train, trainloader, DEVICE) valid_loss, valid_preds = valid_fn(model, loss_val, validloader, DEVICE) if valid_loss < best_loss: best_loss = valid_loss best_loss_epoch = epoch oof[val_idx] = valid_preds torch.save(model.state_dict(), model_path) elif (EARLY_STOP == True): early_step += 1 if (early_step >= early_stopping_steps): break print( f"FOLD: {fold}, EPOCH: {epoch},train_loss: {train_loss:.6f},\ valid_loss: {valid_loss:.6f} best_loss: {best_loss:.6f}, best_loss_epoch: {best_loss_epoch}" ) #--------------------- PREDICTION--------------------- testdataset = TestDataset(df_test_x_copy.values) testloader = torch.utils.data.DataLoader( testdataset, batch_size=self.BATCH_SIZE, shuffle=False) model = Model(num_features=num_features, num_targets=num_targets, hidden_size=hidden_size) model.load_state_dict(torch.load(model_path)) model.to(DEVICE) predictions = np.zeros(df_test_y.shape) predictions = inference_fn(model, testloader, DEVICE) return oof, predictions