Exemplo n.º 1
0
def test_init_train(tmpdir):
    train_dl = torch.utils.data.DataLoader(DummyDataset(), batch_size=16)
    model = TabularClassifier(num_classes=10,
                              num_features=16 + 16,
                              embedding_sizes=16 * [(10, 32)])
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, train_dl)
Exemplo n.º 2
0
def test_jit(tmpdir):
    model = TabularClassifier(num_classes=10,
                              num_features=8,
                              embedding_sizes=4 * [(10, 32)])
    model.eval()

    # torch.jit.script doesn't work with tabnet
    model = torch.jit.trace(
        model, ((torch.randint(0, 10, size=(1, 4)), torch.rand(1, 4)), ))

    # TODO: torch.jit.save doesn't work with tabnet
    # path = os.path.join(tmpdir, "test.pt")
    # torch.jit.save(model, path)
    # model = torch.jit.load(path)

    out = model((torch.randint(0, 10, size=(1, 4)), torch.rand(1, 4)))
    assert isinstance(out, torch.Tensor)
    assert out.shape == torch.Size([1, 10])
def test_classification(backbone, fields, tmpdir):
    train_data_frame = TEST_DF_1.copy()
    val_data_frame = TEST_DF_1.copy()
    test_data_frame = TEST_DF_1.copy()
    data = TabularClassificationData.from_data_frame(
        **fields,
        target_fields="label",
        train_data_frame=train_data_frame,
        val_data_frame=val_data_frame,
        test_data_frame=test_data_frame,
        num_workers=0,
        batch_size=2,
    )
    model = TabularClassifier.from_data(datamodule=data, backbone=backbone)
    trainer = pl.Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    trainer.fit(model, data)
Exemplo n.º 4
0
def test_serve():
    train_data = {
        "num_col": [1.4, 2.5],
        "cat_col": ["positive", "negative"],
        "target": [1, 2]
    }
    datamodule = TabularData.from_data_frame(
        "cat_col",
        "num_col",
        "target",
        pd.DataFrame.from_dict(train_data),
    )
    model = TabularClassifier.from_data(datamodule)
    # TODO: Currently only servable once a preprocess has been attached
    model._preprocess = datamodule.preprocess
    model.eval()
    model.serve()
Exemplo n.º 5
0
def test_classification(tmpdir):

    train_df = TEST_DF_1.copy()
    val_df = TEST_DF_1.copy()
    test_df = TEST_DF_1.copy()
    data = TabularData.from_df(
        train_df,
        categorical_cols=["category"],
        numerical_cols=["scalar_a", "scalar_b"],
        target_col="label",
        val_df=val_df,
        test_df=test_df,
        num_workers=0,
        batch_size=2,
    )
    model = TabularClassifier(num_features=3, num_classes=2, embedding_sizes=data.emb_sizes)
    trainer = pl.Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    trainer.fit(model, data)
# 1. Download the data
download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "data/")

# 2. Load the data
datamodule = TabularData.from_csv(
    ["Sex", "Age", "SibSp", "Parch", "Ticket", "Cabin", "Embarked"],
    ["Fare"],
    target_field="Survived",
    train_file="./data/titanic/titanic.csv",
    test_file="./data/titanic/test.csv",
    val_split=0.25,
)

# 3. Build the model
model = TabularClassifier.from_data(
    datamodule, metrics=[Accuracy(), Precision(),
                         Recall()])

# 4. Create the trainer
trainer = flash.Trainer(fast_dev_run=True)

# 5. Train the model
trainer.fit(model, datamodule=datamodule)

# 6. Test model
trainer.test(model)

# 7. Save it!
trainer.save_checkpoint("tabular_classification_model.pt")
Exemplo n.º 7
0
def test_module_import_error(tmpdir):
    with pytest.raises(ModuleNotFoundError, match="[tabular]"):
        TabularClassifier(num_classes=10, num_features=16, embedding_sizes=[])
from flash.core.data import download_data
from flash.tabular import TabularClassifier

if __name__ == "__main__":

    # 1. Download the data
    download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", 'data/')

    # 2. Load the model from a checkpoint
    model = TabularClassifier.load_from_checkpoint(
        "https://flash-weights.s3.amazonaws.com/tabular_classification_model.pt"
    )

    # 3. Generate predictions from a sheet file! Who would survive?
    predictions = model.predict("data/titanic/titanic.csv")
    print(predictions)
Exemplo n.º 9
0
def test_load_from_checkpoint_dependency_error():
    with pytest.raises(ModuleNotFoundError,
                       match=re.escape("'lightning-flash[tabular]'")):
        TabularClassifier.load_from_checkpoint("not_a_real_checkpoint.pt")
Exemplo n.º 10
0
import torch

import flash
from flash.core.data.utils import download_data
from flash.tabular import TabularClassificationData, TabularClassifier

# 1. Create the DataModule
download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "./data")

datamodule = TabularClassificationData.from_csv(
    ["Sex", "Age", "SibSp", "Parch", "Ticket", "Cabin", "Embarked"],
    "Fare",
    target_fields="Survived",
    train_file="data/titanic/titanic.csv",
    val_split=0.1,
)

# 2. Build the task
model = TabularClassifier.from_data(datamodule)

# 3. Create the trainer and train the model
trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count())
trainer.fit(model, datamodule=datamodule)

# 4. Generate predictions from a CSV
predictions = model.predict("data/titanic/titanic.csv")
print(predictions)

# 5. Save the model!
trainer.save_checkpoint("tabular_classification_model.pt")
# 1. Create the DataModule
download_data("https://pl-flash-data.s3.amazonaws.com/titanic.zip", "./data")

datamodule = TabularClassificationData.from_csv(
    categorical_fields=[
        "Sex", "Age", "SibSp", "Parch", "Ticket", "Cabin", "Embarked"
    ],
    numerical_fields="Fare",
    target_fields="Survived",
    train_file="data/titanic/titanic.csv",
    val_split=0.1,
    batch_size=8,
)

# 2. Build the task
model = TabularClassifier.from_data(datamodule, backbone="fttransformer")

# 3. Create the trainer and train the model
trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count())
trainer.fit(model, datamodule=datamodule)

# 4. Generate predictions from a CSV
datamodule = TabularClassificationData.from_csv(
    predict_file="data/titanic/titanic.csv",
    parameters=datamodule.parameters,
    batch_size=8,
)
predictions = trainer.predict(model, datamodule=datamodule, output="classes")
print(predictions)

# 5. Save the model!