def test_save_load(
    regression_data,
    model_config_class,
    continuous_cols,
    categorical_cols,
    custom_metrics,
    custom_loss,
    custom_optimizer,
    tmpdir,
):
    (train, test, target) = regression_data
    data_config = DataConfig(
        target=target,
        continuous_cols=continuous_cols,
        categorical_cols=categorical_cols,
    )
    model_config_class, model_config_params = model_config_class
    model_config_params["task"] = "regression"
    model_config = model_config_class(**model_config_params)
    trainer_config = TrainerConfig(
        max_epochs=3,
        checkpoints=None,
        early_stopping=None,
        gpus=None,
        fast_dev_run=True,
    )
    optimizer_config = OptimizerConfig()

    tabular_model = TabularModel(
        data_config=data_config,
        model_config=model_config,
        optimizer_config=optimizer_config,
        trainer_config=trainer_config,
    )
    tabular_model.fit(
        train=train,
        test=test,
        metrics=custom_metrics,
        loss=custom_loss,
        optimizer=custom_optimizer,
        optimizer_params={},
    )

    result_1 = tabular_model.evaluate(test)
    # sv_dir = tmpdir/"save_model"
    # sv_dir.mkdir(exist_ok=True, parents=True)
    sv_dir = tmpdir.mkdir("saved_model")
    tabular_model.save_model(str(sv_dir))
    new_mdl = TabularModel.load_from_checkpoint(str(sv_dir))
    result_2 = new_mdl.evaluate(test)
    assert (result_1[0][f"test_{tabular_model.model.hparams.metrics[0]}"] ==
            result_2[0][f"test_{new_mdl.model.hparams.metrics[0]}"])
def main():
    # Generate Synthetic Data
    data, cat_col_names, num_col_names = make_mixed_classification(
        n_samples=10000, n_features=20, n_categories=4)
    train, test = train_test_split(data, random_state=42)
    train, val = train_test_split(train, random_state=42)

    # ##########Define the Configs############
    data_config = DataConfig(target=["target"],
                             continuous_cols=num_col_names,
                             categorical_cols=cat_col_names)
    trainer_config = TrainerConfig(auto_lr_find=True,
                                   batch_size=1024,
                                   max_epochs=100,
                                   gpus=1)
    optimizer_config = OptimizerConfig()

    model_config = CategoryEmbeddingModelConfig(task="classification",
                                                layers="1024-512-512",
                                                activation="LeakyReLU",
                                                learning_rate=1e-3)

    tabular_mode = TabularModel(data_config=data_config,
                                model_config=model_config,
                                optimizer_config=optimizer_config,
                                trainer_config=trainer_config)

    # Training the Model
    tabular_mode.fit(train=train, validation=val)
    # Evaluating the Model
    # #Loss and Metrics on New Data¶
    result = tabular_mode.evaluate(test)

    # #New Predictions as DataFrame
    pred_df = tabular_mode.predict(test)
    pred_df.head()

    print_metrics(test['target'], pred_df["prediction"], tag="Holdout")

    # saving model
    tabular_mode.save_model("Analysis/basic")
示例#3
0
        'target'
    ],  #target should always be a list. Multi-targets are only supported for regression. Multi-Task Classification is not implemented
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
)
trainer_config = TrainerConfig(
    auto_lr_find=
    True,  # Runs the LRFinder to automatically derive a learning rate
    batch_size=1024,
    max_epochs=100,
    gpus=1,  #index of the GPU to use. 0, means CPU
)
optimizer_config = OptimizerConfig()

model_config = CategoryEmbeddingModelConfig(
    task="classification",
    layers="1024-512-512",  # Number of nodes in each layer
    activation="LeakyReLU",  # Activation between each layers
    learning_rate=1e-3)

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)
tabular_model.fit(train=train, validation=val)
result = tabular_model.evaluate(test)
pred_df = tabular_model.predict(test)
tabular_model.save_model("Analysis/basic")
loaded_model = TabularModel.load_from_checkpoint("Analysis/basic")
def main_64():
    # Generate Synthetic Data
    global train
    data, test_data, cat_col_names, num_col_names = data_load()
    bsize = 2500*3*2*2

    # ##########Define the Configs############
    data_config = DataConfig(
        target=["target"],
        continuous_cols=num_col_names,
        categorical_cols=cat_col_names,
        num_workers=4
    )
    trainer_config = TrainerConfig(
        auto_lr_find=True,
        batch_size=bsize,
        max_epochs=100,
        gpus=1
    )
    optimizer_config = OptimizerConfig()

    model_config = TabNetModelConfig(
        task="classification",
        learning_rate=1e-3*bsize/1024,
        n_d=64,
        n_a=64,
        n_steps=5,
        gamma=1.3
    )

    # Training the Model
    # tabular_mode.fit(train=train, validation=val)
    # # Evaluating the Model
    # # #Loss and Metrics on New Data¶
    # result = tabular_mode.evaluate(test)

    cv = StratifiedKFold(n_splits=10, shuffle=True)

    res_pred = []
    res_test = []
    for i, (train_idx, test_idx) in enumerate(cv.split(X=data, y=data.target.values)):
        train, test = data.iloc[train_idx], data.iloc[test_idx]
        train, val = train_test_split(train, random_state=42)

        tabular_mode = TabularModel(
            data_config=data_config,
            optimizer_config=optimizer_config,
            model_config=model_config,
            trainer_config=trainer_config
        )
        weighted_loss = get_class_weighted_cross_entropy(train["target"].values.ravel(), mu=0.1)

        # Training the Model
        tabular_mode.fit(train=train, validation=val, max_epochs=100, loss=weighted_loss)
        pred_df = tabular_mode.predict(test).loc[:, ["prediction"]]
        res_pred.append(pred_df)
        tabular_mode.save_model(f"Analysis/basic_tabnet_rep{i}")

        pred = tabular_mode.predict(test_data)
        res_test.append(pred)

    # #New Predictions as DataFrame
    pred_tot = pd.concat(res_pred).sort_index()

    print_metrics(data['target'], pred_tot["prediction"], tag="Holdout")

    pred_df = pd.concat([res_testi.loc[:, ["0_probability"]] for res_testi in res_test], axis=1).apply(np.mean, axis=1)
    pred_df2 = pred_df.map(lambda x: 1 if x>0.5 else 0)

    sample_submisson = pd.read_csv("Data/sample_submission.csv")
    sample_submisson["target"] = pred_df2.values

    sample_submisson.to_csv("Analysis/submission_2.csv", index=False)

    print(confusion_matrix(data['target'], pred_tot["prediction"]))
def main():
    # Generate Synthetic Data
    data, cat_col_names, num_col_names = data_load()
    bsize = 1024

    # ##########Define the Configs############
    data_config = DataConfig(target=["target"],
                             continuous_cols=num_col_names,
                             categorical_cols=cat_col_names)
    trainer_config = TrainerConfig(auto_lr_find=True,
                                   batch_size=bsize,
                                   max_epochs=100,
                                   gpus=1)
    optimizer_config = OptimizerConfig()

    model_config = CategoryEmbeddingModelConfig(task="classification",
                                                layers="1024-512-512",
                                                activation="LeakyReLU",
                                                learning_rate=1e-3)

    tabular_mode = TabularModel(data_config=data_config,
                                model_config=model_config,
                                optimizer_config=optimizer_config,
                                trainer_config=trainer_config)

    # Training the Model
    # tabular_mode.fit(train=train, validation=val)
    # # Evaluating the Model
    # # #Loss and Metrics on New Data¶
    # result = tabular_mode.evaluate(test)

    cv = StratifiedKFold(n_splits=10, shuffle=True)

    res_pred = []
    for train_idx, test_idx in cv.split(X=data, y=data.target.values):
        train, test = data.iloc[train_idx], data.iloc[test_idx]
        train, val = train_test_split(train, random_state=42)

        tabular_mode = TabularModel(data_config=data_config,
                                    model_config=model_config,
                                    optimizer_config=optimizer_config,
                                    trainer_config=trainer_config)

        weighted_loss = get_class_weighted_cross_entropy(
            train["target"].values.ravel(), mu=0.1)

        # Training the Model
        tabular_mode.fit(train=train,
                         validation=val,
                         max_epochs=100,
                         loss=weighted_loss)
        pred_df = tabular_mode.predict(test).loc[:, ["prediction"]]
        res_pred.append(pred_df)

    # #New Predictions as DataFrame
    pred_tot = pd.concat(res_pred).sort_index()

    print_metrics(data['target'], pred_tot["prediction"], tag="Holdout")

    confusion_matrix(data['target'], pred_tot["prediction"])

    # saving model
    tabular_mode.save_model("Analysis/basic")
def test_pretrained_backbone(
    regression_data,
    model_config_class,
    continuous_cols,
    categorical_cols,
    custom_metrics,
    custom_loss,
    custom_optimizer,
    tmpdir,
):
    (train, test, target) = regression_data
    data_config = DataConfig(
        target=target,
        continuous_cols=continuous_cols,
        categorical_cols=categorical_cols,
    )

    model_config_class, model_config_params = model_config_class
    model_config_params["task"] = "ssl"
    model_config_params["ssl_task"] = "Denoising"
    model_config_params["aug_task"] = "cutmix"
    model_config = model_config_class(**model_config_params)
    trainer_config = TrainerConfig(
        max_epochs=3,
        checkpoints=None,
        early_stopping=None,
        gpus=None,
        fast_dev_run=True,
    )
    optimizer_config = OptimizerConfig()

    tabular_model = TabularModel(
        data_config=data_config,
        model_config=model_config,
        optimizer_config=optimizer_config,
        trainer_config=trainer_config,
    )
    tabular_model.fit(
        train=train,
        test=test,
        metrics=custom_metrics,
        loss=custom_loss,
        optimizer=custom_optimizer,
        optimizer_params={},
    )
    result_1 = tabular_model.evaluate(test)
    with pytest.raises(AssertionError):
        tabular_model.predict(test)
    assert "test_mean_squared_error" in result_1[0].keys()
    sv_dir = tmpdir.mkdir("saved_model")
    tabular_model.save_model(str(sv_dir))
    old_mdl = TabularModel.load_from_checkpoint(str(sv_dir))
    model_config_params["task"] = "regression"
    model_config_params["ssl_task"] = None
    model_config_params["aug_task"] = None
    model_config = model_config_class(**model_config_params)
    trainer_config = TrainerConfig(
        max_epochs=1,
        checkpoints=None,
        early_stopping=None,
        gpus=None,
        fast_dev_run=True,
    )
    tabular_model = TabularModel(
        data_config=data_config,
        model_config=model_config,
        optimizer_config=optimizer_config,
        trainer_config=trainer_config,
    )
    tabular_model.fit(
        train=train,
        test=test,
        metrics=custom_metrics,
        loss=custom_loss,
        optimizer=custom_optimizer,
        optimizer_params={},
        trained_backbone=old_mdl.model.backbone,
    )
    result_2 = tabular_model.evaluate(test)
    assert "test_mean_squared_error" in result_2[0].keys()