示例#1
0
    mask_type="sparsemax",  # "sparsemax"
)
max_epochs = 1000
unsupervised_model.fit(
    X_train=X_trn,
    eval_set=[X_vld],
    max_epochs=max_epochs,
    patience=10,
    batch_size=512,
    virtual_batch_size=64,
    num_workers=0,
    drop_last=False,
    pretraining_ratio=0.8,
)

unsupervised_model.save_model(f"{OUTPUTDIR}/pretrain")

oof_preds = []
test_preds = []
scores = []
importances = pd.DataFrame()
cv_repeat = 10
for i in range(cv_repeat):
    print(f"CV {i + 1}")
    oof = train.result_score.astype("float64").copy().values
    kfold = KFold(n_splits=5, shuffle=True, random_state=i * 42)
    _test_preds = []
    for fold, (trn_index, vld_index) in enumerate(kfold.split(X, y_binary)):
        X_trn, y_trn = X[trn_index], y_binary[trn_index]
        X_vld, y_vld = X[vld_index], y_binary[vld_index]
        print(f"Fold {fold}, data split")
示例#2
0
        mask_type="entmax",  # "sparsemax"
    )
    max_epochs = 1000
    unsupervised_model.fit(
        X_train=X_trn,
        eval_set=[X_vld],
        max_epochs=max_epochs,
        patience=25,
        batch_size=2048,
        virtual_batch_size=64,
        num_workers=0,
        drop_last=False,
        pretraining_ratio=0.8,
    )

    unsupervised_model.save_model(f"{OUTPUTDIR}/test_pretrain_fold{fold}")
    loaded_pretrain = TabNetPretrainer()
    loaded_pretrain.load_model(f"{OUTPUTDIR}/test_pretrain_fold{fold}.zip")

    clf = TabNetClassifier(
        optimizer_fn=torch.optim.Adam,
        optimizer_params=dict(lr=2e-1),
        scheduler_params={"gamma": 0.95},  # how to use learning rate scheduler
        scheduler_fn=torch.optim.lr_scheduler.ExponentialLR,
        mask_type=
        "sparsemax",  # This will be overwritten if using pretrain model
    )

    clf.fit(
        X_train=X_trn,
        y_train=y_trn,