Пример #1
0
def test_id_wise_sampling_does_not_put_id_in_multiple_dataloaders():
    data = list()
    num_ids = 150
    sample_id = 0

    for sequence_id in range(num_ids):
        for sequence in range(40):
            data.append([sequence, sample_id, sequence_id])
            sample_id += 1

    df = pd.DataFrame(data, columns=["x", "y", "ID"])

    # Hyperparameters
    batch_size = 10
    num_past = 10
    num_future = 5
    train_split_ratio = 0.333
    validation_split_ratio = 0.333

    dataloaders = dataset.MultiModalDataLoader(
        df,
        batch_size=batch_size,
        n_past=num_past,
        n_future=num_future,
        num_workers=1,
        train_split_ratio=train_split_ratio,
        validation_split_ratio=validation_split_ratio,
        scale=False,
    )

    verify_sequential_id_sampled_sequential_dataloaders_equal_dataloaders(
        dataloaders, train_split_ratio, validation_split_ratio, num_ids)
Пример #2
0
def test_category_wise_sampling_few_categories():
    data = list()
    num_categories = 5

    for category in range(num_categories):
        for sequence in range(40 + int(category / 14)):
            data.append([sequence, sequence, category])

    df = pd.DataFrame(data, columns=['x', 'y', 'ID'])

    # Hyperparameters
    batch_size = 1
    num_past = 10
    num_future = 5
    train_split_ratio = 0.5
    validation_split_ratio = 0.2

    dataloaders = dataset.MultiModalDataLoader(
        df,
        batch_size=batch_size,
        n_past=num_past,
        n_future=num_future,
        num_workers=1,
        train_split_ratio=train_split_ratio,
        validation_split_ratio=validation_split_ratio)
    verify_category_wise_sampled_dataloaders(dataloaders, train_split_ratio,
                                             validation_split_ratio,
                                             num_categories)
Пример #3
0
def test_time_based_sampling_dataloaders_do_not_overlap():
    data = list()
    num_ids = 140
    sequence_length = 2000

    # Hyperparameters
    batch_size = 10
    num_past = 10
    num_future = 5
    train_split_ratio = 0.501
    validation_split_ratio = 0.25

    split_by_id = False  # The test condition

    # The train[0] column should contain only 1s, the test column should contain 2s and the
    # validation column set should contain 3s.
    # When scaled, this translates to -1., 0 and 1. respectively.
    for sample_id in range(num_ids):
        for element in range(round(sequence_length * train_split_ratio)):
            data.append([1, element, sample_id])
        for element in range(
                round(sequence_length *
                      (1 - train_split_ratio - validation_split_ratio))):
            data.append([2, element, sample_id])
        for element in range(round(sequence_length * validation_split_ratio)):
            data.append([3, element, sample_id])

    df = pd.DataFrame(data, columns=["x", "y", "ID"])

    dataloaders = dataset.MultiModalDataLoader(
        df,
        batch_size=batch_size,
        n_past=num_past,
        n_future=num_future,
        num_workers=1,
        train_split_ratio=train_split_ratio,
        validation_split_ratio=validation_split_ratio,
        split_by_id=split_by_id,
    )

    for data, target, ids, parameters, classes in dataloaders["train_loader"]:
        for sequence in data:
            assert all(sample == -1.0 for sample in sequence[:, 0])
        for sequence in target:
            assert all(sample == -1.0 for sample in sequence[:, 0])

    for data, target, ids, parameters, classes in dataloaders["test_loader"]:
        for sequence in data:
            assert all(sample == 0 for sample in sequence[:, 0])
        for sequence in target:
            assert all(sample == 0 for sample in sequence[:, 0])

    for data, target, ids, parameters, classes in dataloaders[
            "validation_loader"]:
        for sequence in data:
            assert all(sample == 1 for sample in sequence[:, 0])
        for sequence in target:
            assert all(sample == 1 for sample in sequence[:, 0])
Пример #4
0
def test_ae():
    """
    Test Autoencoder and variational auto encoder models for training/testing/generative network and
    classification networks

    """

    # Sample data
    df = jaguar()

    # Hyperparameters
    batch_size = 10
    num_past = 10
    num_future = 5
    # Prepare the dataloader
    data_loaders = dataset.MultiModalDataLoader(df,
                                                batch_size=batch_size,
                                                n_past=num_past,
                                                n_future=num_future,
                                                num_workers=1,
                                                train_split_ratio=0.5,
                                                validation_split_ratio=0.2)

    model_save_path = './model.pt'

    model = MultiModelAE(input_size=2,
                         num_past=num_past,
                         batch_size=batch_size,
                         num_future=num_future,
                         lstm_hidden_size=32,
                         num_lstm_layers=2,
                         output_size=2,
                         latent_size=10,
                         batch_first=True,
                         dropout=0.1,
                         reset_state=True,
                         bidirectional=False,
                         num_classifier_layers=4,
                         classifier_hidden_size=32,
                         num_classes=9)

    # Model Trainer
    # Model types; "ae" or "vae"
    trainer = HybridTrainer(model=model,
                            optimizer_type='Adam',
                            loss_type='huber')

    # Train the model
    trainer.fit(data_loaders,
                model_save_path,
                epochs=10,
                training_mode='forecasting')
    trainer.fit(data_loaders,
                model_save_path,
                epochs=10,
                training_mode='classification')
Пример #5
0
def test_lstm_jaguar():
    """
    Testing method for lstm model used for forecasting.
    """

    # Sample data
    df = jaguar()

    # Hyperparameters
    batch_size = 10
    num_past = 10
    num_future = 10

    # For timeseries prediction
    assert num_past == num_future

    # Prepare the dataloader
    data_loaders = dataset.MultiModalDataLoader(
        df, batch_size=batch_size, n_past=num_past, n_future=num_future, num_workers=1
    )

    model_save_path = "./model.pt"

    # Model init
    model = LSTM(
        input_size=2,
        hidden_size=32,
        num_layers=2,
        output_size=2,
        dropout=0.1,
        batch_size=batch_size,
        num_future=num_future,
        bidirectional=False,
        batch_first=True,
        reset_state=True,
    )

    # Model Trainer
    trainer = HybridTrainer(model=model,
                            optimizer_type='Adam',
                            loss_type='huber')

    forecasting_loss_pre_training, _, _ = trainer.validate(data_loaders['train_loader'])
    print(f'Loss pre training: {forecasting_loss_pre_training}')

    # Train the model
    trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="forecasting", validate_every=1, test_every=2)

    forecasting_loss_post_training, _, _ = trainer.validate(data_loaders['train_loader'])

    print(f'Loss post training: {forecasting_loss_post_training}')
    assert forecasting_loss_post_training < forecasting_loss_pre_training
Пример #6
0
def test_sequential_data_loader_indices_are_sequential():
    data = list()
    num_ids = 46

    for sample_id in range(num_ids):
        for sequence in range(40 + int(sample_id / 14)):
            data.append([sequence, sequence, sample_id])

    df = pd.DataFrame(data, columns=["x", "y", "ID"])

    # Hyperparameters
    batch_size = 18
    num_past = 13
    num_future = 8
    train_split_ratio = 0.5
    validation_split_ratio = 0.2
    stride = 1

    dataloaders = dataset.MultiModalDataLoader(
        df,
        batch_size=batch_size,
        n_past=num_past,
        n_future=num_future,
        num_workers=1,
        train_split_ratio=train_split_ratio,
        validation_split_ratio=validation_split_ratio,
        stride=stride,
    )

    current_id = 0
    for data, target, ids, parameters, classes in dataloaders[
            "sequential_train_loader"]:
        for id in ids:
            id = int(id)
            if id > current_id:
                current_id = id
            assert (
                id == current_id
            ), "IDs in sequential train loader should increase monotonically!"

    current_id = 0
    for data, target, ids, parameters, classes in dataloaders[
            "sequential_test_loader"]:
        for id in ids:
            id = int(id)
            if id > current_id:
                current_id = id
            assert (
                id == current_id
            ), "IDs in sequential test loader should increase monotonically!"
Пример #7
0
def test_ae_jaguar():
    """
    Test autoencoder forecasting with the Jaguar dataset
    """

    # Sample data
    df = jaguar()

    # Hyperparameters
    batch_size = 10
    num_past = 10
    num_future = 5
    # Prepare the dataloader
    data_loaders = dataset.MultiModalDataLoader(
        df,
        batch_size=batch_size,
        n_past=num_past,
        n_future=num_future,
        num_workers=1,
        train_split_ratio=0.5,
        validation_split_ratio=0.2,
    )

    model_save_path = "./model.pt"

    model = MultiModelAE(
        input_size=2,
        num_past=num_past,
        batch_size=batch_size,
        num_future=num_future,
        lstm_hidden_size=32,
        num_lstm_layers=2,
        output_size=2,
        latent_size=10,
        batch_first=True,
        dropout=0.1,
        reset_state=True,
        bidirectional=False,
    )

    # Model Trainer
    # Model types; "ae" or "vae"
    trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="huber")

    # Train the model
    trainer.fit(data_loaders, model_save_path, epochs=5, training_mode="forecasting", validate_every=2, test_every=5)
    trainer.fit(data_loaders, model_save_path, epochs=5, training_mode="forecasting", validate_every=None, test_every=5)
    trainer.fit(data_loaders, model_save_path, epochs=5, training_mode="forecasting", validate_every=2, test_every=None)

    trainer.validate(data_loaders["sequential_validation_loader"])
Пример #8
0
def test_plot_prediction():
    # Hyperparameters
    batch_size = 10
    num_past = 10
    num_future = 10

    input_size = 2
    lstm_hidden_size = 512
    lstm_num_layers = 4
    batch_first = True
    reset_state = True
    output_size = 2
    num_classes = 9
    latent_size = 20
    dropout = 0.1
    bidirectional = False

    # Prepare the dataloader
    df = jaguar()
    data_loaders = dataset.MultiModalDataLoader(df,
                                                batch_size=batch_size,
                                                n_past=num_past,
                                                n_future=num_future,
                                                num_workers=1)

    model = MultiModelVAE(input_size=input_size,
                          output_size=output_size,
                          lstm_hidden_size=lstm_hidden_size,
                          num_lstm_layers=lstm_num_layers,
                          num_classes=num_classes,
                          latent_size=latent_size,
                          dropout=dropout,
                          num_classifier_layers=4,
                          classifier_hidden_size=32,
                          batch_size=batch_size,
                          num_future=num_future,
                          num_past=num_past,
                          bidirectional=bidirectional,
                          batch_first=batch_first,
                          reset_state=reset_state)

    trainer = HybridTrainer(model=model,
                            optimizer_type='Adam',
                            loss_type='huber')

    model_save_path = './model.pt'

    plot_prediction(model, data_loaders['sequential_test_loader'], 1)
Пример #9
0
def test_time_based_weighted_sampling_dataloaders_do_not_overlap():
    data = list()
    num_ids = 232
    sample_id = 0

    for sequence_id in range(num_ids):
        for sequence in range(40 + (int(sequence_id * 2.234) % 117)):
            data.append([sequence, sample_id, sequence_id])
            sample_id += 1

    df = pd.DataFrame(data, columns=["x", "y", "ID"])

    # Hyperparameters
    batch_size = 10
    num_past = 10
    num_future = 5
    train_split_ratio = 0.333
    validation_split_ratio = 0.333

    dataloaders = dataset.MultiModalDataLoader(
        df,
        batch_size=batch_size,
        n_past=num_past,
        n_future=num_future,
        num_workers=1,
        train_split_ratio=train_split_ratio,
        validation_split_ratio=validation_split_ratio,
        scale=False,
        split_by_id=False,
        weighted_sampling=True,
        stride=1,
    )

    train_ids = extract_sample_ids_from_dataloader(dataloaders["train_loader"])
    test_ids = extract_sample_ids_from_dataloader(dataloaders["test_loader"])
    validation_ids = extract_sample_ids_from_dataloader(
        dataloaders["validation_loader"])
    sequential_train_ids = extract_sample_ids_from_dataloader(
        dataloaders["sequential_train_loader"])
    sequential_test_ids = extract_sample_ids_from_dataloader(
        dataloaders["sequential_test_loader"])
    sequential_validation_ids = extract_sample_ids_from_dataloader(
        dataloaders["sequential_validation_loader"])

    verify_that_indices_belong_to_precisely_one_loader(train_ids, test_ids,
                                                       validation_ids)
    verify_that_indices_belong_to_precisely_one_loader(
        sequential_train_ids, sequential_test_ids, sequential_validation_ids)
Пример #10
0
def test_lstm():
    """
    Testing method for lstm model used for forecasting.
    """

    # Sample data
    df = jaguar()

    # Hyperparameters
    batch_size = 10
    num_past = 10
    num_future = 10

    # For timeseries prediction
    assert num_past == num_future

    # Prepare the dataloader
    data_loaders = dataset.MultiModalDataLoader(df,
                                                batch_size=batch_size,
                                                n_past=num_past,
                                                n_future=num_future,
                                                num_workers=1)

    model_save_path = './model.pt'

    # Model init
    model = LSTM(input_size=2,
                 hidden_size=32,
                 num_layers=2,
                 output_size=2,
                 dropout=0.1,
                 batch_size=batch_size,
                 num_future=num_future,
                 bidirectional=False,
                 batch_first=True,
                 reset_state=True)

    # Model Trainer
    trainer = HybridTrainer(model=model,
                            optimizer_type='Adam',
                            loss_type='huber')
    # Train the model
    trainer.fit(data_loaders,
                model_save_path,
                epochs=10,
                training_mode='forecasting')
Пример #11
0
def test_id_wise_sampling_with_short_sequences_does_not_divide_by_zero():
    data = list()
    num_ids = 283
    sample_id = 0

    for sequence_id in range(num_ids):
        for sequence in range(
                1 + (sequence_id %
                     74)):  # Some sequences will generate zero time series
            data.append([sequence, sample_id, sequence_id])
            sample_id += 1

    df = pd.DataFrame(data, columns=["x", "y", "ID"])

    # Hyperparameters
    batch_size = 1
    num_past = 10
    num_future = 5
    train_split_ratio = 0.333
    validation_split_ratio = 0.333

    dataloaders = dataset.MultiModalDataLoader(
        df,
        batch_size=batch_size,
        n_past=num_past,
        n_future=num_future,
        num_workers=1,
        train_split_ratio=train_split_ratio,
        validation_split_ratio=validation_split_ratio,
        scale=False,
    )

    verify_sequential_id_sampled_sequential_dataloaders_equal_dataloaders(
        dataloaders,
        train_split_ratio,
        validation_split_ratio,
        num_ids,
        expect_all_ids=False,
    )
Пример #12
0
def test_vae_classification_network_converges():
    """
    Test that Autoencoder and variational auto encoder models for classification networks converge
    """

    data = list()
    num_ids = 8

    for sample_id in range(num_ids):
        sample_class = sample_id % 2
        for sequence in range(70 + sample_id * 4):
            xx = sample_class * np.sin(sequence / 20.0) + (sample_class - 1) * sequence
            yy = sample_class * np.cos(sequence / 20.0) + (sample_class - 1) * sequence
            data.append([xx, yy, sample_id, sample_class])
    # Sample data
    df = pd.DataFrame(data, columns=['x', 'y', 'ID', 'class'])

    # Hyperparameters
    batch_size = 2
    num_past = 10
    num_future = 5
    # Prepare the dataloader
    data_loaders = dataset.MultiModalDataLoader(df,
                                                batch_size=batch_size,
                                                n_past=num_past,
                                                n_future=num_future,
                                                train_split_ratio=0.333,
                                                validation_split_ratio=0.333,
                                                num_workers=1,
                                                split_by_id=False,
                                                stride=1)

    model_save_path = './model.pt'

    model = MultiModelVAE(input_size=2,
                          output_size=2,
                          lstm_hidden_size=32,
                          num_lstm_layers=2,
                          num_classes=2,
                          latent_size=10,
                          dropout=0.1,
                          num_classifier_layers=4,
                          classifier_hidden_size=32,
                          batch_size=batch_size,
                          num_future=num_future,
                          num_past=num_past,
                          bidirectional=False,
                          batch_first=True,
                          reset_state=True)

    # Test resetting the classifier, to make sure this function works
    model.reset_classifier(classifier_hidden_size=32, num_classifier_layers=4)

    # Model Trainer
    # Model types; "ae" or "vae"
    trainer = HybridTrainer(model=model,
                            optimizer_type='Adam',
                            loss_type='mse')

    _, _, classification_loss_pre_training = trainer.validate(data_loaders['train_loader'])

    print(f'Loss pre training: {classification_loss_pre_training}')

    # Train the model
    trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="forecasting", validate_every=1, test_every=2)
    trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="classification", validate_every=1, test_every=2)

    _, _, classification_loss_post_training = trainer.validate(data_loaders['train_loader'])

    print(f'Loss post training: {classification_loss_post_training}')
    assert classification_loss_post_training < classification_loss_pre_training
Пример #13
0
def test_vae_regression_network_converges():
    """
    Test that Autoencoder and variational auto encoder models for regression networks converge
    """

    data = list()
    num_ids = 3

    for sample_id in range(num_ids):
        for sequence in range(70 + sample_id * 4):
            parameter_one = 0.2 * sample_id
            parameter_two = 91.235 * sample_id
            data.append([sequence, sequence, sample_id, parameter_one, parameter_two])
    # Sample data
    df = pd.DataFrame(data, columns=['x', 'y', 'ID', 'parameter_one', 'parameter_two'])

    parameter_columns = ['parameter_one', 'parameter_two']

    # Hyperparameters
    batch_size = 1
    num_past = 10
    num_future = 5
    # Prepare the dataloader
    data_loaders = dataset.MultiModalDataLoader(df,
                                                batch_size=batch_size,
                                                n_past=num_past,
                                                n_future=num_future,
                                                train_split_ratio=0.333,
                                                validation_split_ratio=0.333,
                                                num_workers=1,
                                                parameter_columns=parameter_columns,
                                                split_by_id=False,
                                                stride=1)

    model_save_path = './model.pt'

    model = MultiModelVAE(input_size=2,
                          output_size=2,
                          lstm_hidden_size=32,
                          num_lstm_layers=2,
                          num_regressor_parameters=len(parameter_columns),
                          latent_size=10,
                          dropout=0.1,
                          num_regressor_layers=4,
                          regressor_hidden_size=32,
                          batch_size=batch_size,
                          num_future=num_future,
                          num_past=num_past,
                          bidirectional=False,
                          batch_first=True,
                          reset_state=True)

    # Test resetting the regressor, to make sure this function works
    model.reset_regressor(regressor_hidden_size=32, num_regressor_layers=4)

    # Model Trainer
    # Model types; "ae" or "vae"
    trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="mse")

    _, regression_lost_pre_training, _ = trainer.validate(data_loaders['train_loader'])

    print(f'Loss pre training: {regression_lost_pre_training}')

    # Train the model
    trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="forecasting", validate_every=1, test_every=2)
    trainer.fit(data_loaders, model_save_path, epochs=2, training_mode="regression", validate_every=1, test_every=2)

    _, regression_lost_post_training, _ = trainer.validate(data_loaders['train_loader'])

    print(f'Loss post training: {regression_lost_post_training}')
    assert regression_lost_post_training < regression_lost_pre_training
Пример #14
0
def test_aevae_jaguar():
    """
    Test variational autoencoder forecasting with the Jaguar dataset
    """

    # Sample data
    df = jaguar()

    # Hyperparameters
    batch_size = 10
    num_past = 10
    num_future = 5
    # Prepare the dataloader
    data_loaders = dataset.MultiModalDataLoader(
        df,
        batch_size=batch_size,
        n_past=num_past,
        n_future=num_future,
        train_split_ratio=0.5,
        num_workers=1,
        split_by_id=False,
    )

    model_save_path = "./model.pt"

    model = MultiModelVAE(
        input_size=2,
        output_size=2,
        lstm_hidden_size=32,
        num_lstm_layers=2,
        latent_size=10,
        dropout=0.1,
        batch_size=batch_size,
        num_future=num_future,
        num_past=num_past,
        bidirectional=False,
        batch_first=True,
        reset_state=True,
    )

    # Test that we can run functions on our network.
    model.disable_latent_output()
    model.enable_latent_output()

    # Model Trainer
    # Model types; "ae" or "vae"
    trainer = HybridTrainer(model=model, optimizer_type="Adam", loss_type="huber")

    # Train the model
    trainer.fit(data_loaders, model_save_path, epochs=10, training_mode="forecasting", validate_every=5, test_every=10)

    scaler = data_loaders["train_loader"].dataset.scaler

    # Load the trained model given the path
    model_path = "./model.pt"
    hyperparams = "./hypers.json"
    model_hyperparameters = traja.models.read_hyperparameters(hyperparams)

    # For prebuild traja generative models
    generator = traja.models.inference.Generator(
        model_type="vae",
        model_hyperparameters=model_hyperparameters,
        model_path=model_path,
        model=None,
    )
    out = generator.generate(num_future, classify=False, scaler=scaler, plot_data=False)

    trainer.validate(data_loaders["validation_loader"])