Пример #1
0
        def model_train_pred(fold, seed):

            seed_everything(seed)
            model_path = os.path.join(
                model_dir, model_file_name + f"_SEED{seed}_FOLD{fold}.pth")
            trn_idx = df_train[df_train['fold'] != fold].index
            val_idx = df_train[df_train['fold'] == fold].index

            x_fold_train = df_train_x.loc[trn_idx].reset_index(
                drop=True).copy()
            y_fold_train = df_train_y.loc[trn_idx].reset_index(
                drop=True).copy()

            x_fold_val = df_train_x.loc[val_idx].reset_index(drop=True).copy()
            y_fold_val = df_train_y.loc[val_idx].reset_index(drop=True).copy()
            df_test_x_copy = df_test_x.copy()
            x_fold_train, x_fold_val, df_test_x_copy = normalize(
                x_fold_train, x_fold_val, df_test_x_copy)

            train_dataset = TrainDataset(x_fold_train.values,
                                         y_fold_train.values)
            valid_dataset = TrainDataset(x_fold_val.values, y_fold_val.values)
            trainloader = torch.utils.data.DataLoader(
                train_dataset, batch_size=self.BATCH_SIZE, shuffle=True)
            validloader = torch.utils.data.DataLoader(
                valid_dataset, batch_size=self.BATCH_SIZE, shuffle=False)

            model = SimpleNN_Model(num_features=num_features,
                                   num_targets=num_targets,
                                   hidden_size=hidden_size)
            model.to(DEVICE)

            optimizer = torch.optim.Adam(model.parameters(),
                                         weight_decay=WEIGHT_DECAY,
                                         lr=self.LEARNING_RATE)
            scheduler = optim.lr_scheduler.OneCycleLR(
                optimizer=optimizer,
                pct_start=0.2,
                div_factor=1e3,
                max_lr=1e-2,
                epochs=self.EPOCHS,
                steps_per_epoch=len(trainloader))
            loss_train = SmoothBCEwLogits(smoothing=0.001)
            loss_val = nn.BCEWithLogitsLoss()
            early_stopping_steps = EARLY_STOPPING_STEPS
            early_step = 0

            oof = np.zeros(df_train_y.shape)
            best_loss = np.inf
            best_loss_epoch = -1

            if IS_TRAIN:
                for epoch in range(self.EPOCHS):
                    train_loss = train_fn(model, optimizer, scheduler,
                                          loss_train, trainloader, DEVICE)
                    valid_loss, valid_preds = valid_fn(model, loss_val,
                                                       validloader, DEVICE)
                    if valid_loss < best_loss:
                        best_loss = valid_loss
                        best_loss_epoch = epoch
                        oof[val_idx] = valid_preds
                        torch.save(model.state_dict(), model_path)
                    elif (EARLY_STOP == True):
                        early_step += 1
                        if (early_step >= early_stopping_steps):
                            break
                    if epoch % 10 == 0 or epoch == self.EPOCHS - 1:
                        print(f"seed: {seed}, FOLD: {fold}, EPOCH: {epoch},\
                        train_loss: {train_loss:.6f}, valid_loss: {valid_loss:.6f}, best_loss: {best_loss:.6f},\
                        best_loss_epoch: {best_loss_epoch}")

            #--------------------- PREDICTION---------------------
            testdataset = TestDataset(df_test_x_copy.values)
            testloader = torch.utils.data.DataLoader(
                testdataset, batch_size=self.BATCH_SIZE, shuffle=False)
            model = SimpleNN_Model(num_features=num_features,
                                   num_targets=num_targets,
                                   hidden_size=hidden_size)
            model.load_state_dict(torch.load(model_path))
            model.to(DEVICE)

            if not IS_TRAIN:
                valid_loss, valid_preds = valid_fn(model, loss_fn, validloader,
                                                   DEVICE)
                oof[val_idx] = valid_preds
            predictions = np.zeros(df_test_y.shape)
            predictions = inference_fn(model, testloader, DEVICE)
            return oof, predictions
        def model_train_pred(fold,
                             Model=CNN_Model,
                             df_train_y=df_train_y,
                             df_test_y=df_test_y,
                             features=features,
                             file_name=model_file_name):

            model_path = os.path.join(model_dir,
                                      file_name + f"_FOLD{fold}.pth")
            x_fold_train, y_fold_train, x_fold_val, y_fold_val, df_test_x_copy, val_idx = \
            preprocess(fold, df_train, df_train_x, df_train_y, df_test_x, no_of_components)
            train_dataset = TrainDataset(x_fold_train.values,
                                         y_fold_train.values)
            valid_dataset = TrainDataset(x_fold_val.values, y_fold_val.values)

            trainloader = torch.utils.data.DataLoader(
                train_dataset, batch_size=self.BATCH_SIZE, shuffle=True)
            validloader = torch.utils.data.DataLoader(
                valid_dataset, batch_size=self.BATCH_SIZE, shuffle=False)

            model = Model(num_features=num_features,
                          num_targets=num_targets,
                          hidden_size=hidden_size)
            model.to(DEVICE)
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=self.LEARNING_RATE,
                                         weight_decay=WEIGHT_DECAY,
                                         eps=1e-9)
            scheduler = optim.lr_scheduler.OneCycleLR(
                optimizer=optimizer,
                pct_start=0.1,
                div_factor=1e3,
                max_lr=1e-2,
                epochs=self.EPOCHS,
                steps_per_epoch=len(trainloader))
            loss_train = SmoothBCEwLogits(smoothing=0.001,
                                          pos_weight=pos_weight)
            loss_val = nn.BCEWithLogitsLoss()
            early_stopping_steps = EARLY_STOPPING_STEPS
            early_step = 0
            oof = np.zeros(df_train_y.shape)
            best_loss = np.inf
            best_loss_epoch = -1

            for epoch in range(self.EPOCHS):
                train_loss = train_fn(model, optimizer, scheduler, loss_train,
                                      trainloader, DEVICE)
                valid_loss, valid_preds = valid_fn(model, loss_val,
                                                   validloader, DEVICE)
                if valid_loss < best_loss:
                    best_loss = valid_loss
                    best_loss_epoch = epoch
                    oof[val_idx] = valid_preds
                    torch.save(model.state_dict(), model_path)
                elif (EARLY_STOP == True):
                    early_step += 1
                    if (early_step >= early_stopping_steps):
                        break
                print(
                    f"FOLD: {fold}, EPOCH: {epoch},train_loss: {train_loss:.6f},\
                valid_loss: {valid_loss:.6f} best_loss: {best_loss:.6f}, best_loss_epoch: {best_loss_epoch}"
                )

            #--------------------- PREDICTION---------------------
            testdataset = TestDataset(df_test_x_copy.values)
            testloader = torch.utils.data.DataLoader(
                testdataset, batch_size=self.BATCH_SIZE, shuffle=False)
            model = Model(num_features=num_features,
                          num_targets=num_targets,
                          hidden_size=hidden_size)
            model.load_state_dict(torch.load(model_path))
            model.to(DEVICE)

            predictions = np.zeros(df_test_y.shape)
            predictions = inference_fn(model, testloader, DEVICE)
            return oof, predictions