Exemplo n.º 1
0
def test_arima():

    if os.path.exists("results"):
        shutil.rmtree("results")

    config = {
        'directory': 'results/arima',
        'freq': "30min",
        'params': {
            'arima_order': (1, 0, 0)
        }
    }

    dataset = athena.Dataset("dfw_demand.csv.gz",
                             index="timestamp",
                             freq="30min",
                             max_days=500,
                             max_training_days=200,
                             predition_length=12,
                             test_start_values=["2019-07-27 13:00:00"],
                             test_sequence_length=4)

    transform = DataTransformARIMA(['vehicles'])

    evaluate_ARIMA(dataset, transform, config)
Exemplo n.º 2
0
def get_dataset(config):
    dataset = athena.Dataset(
        config['filename'],
        index=config['index'],
        freq=config['freq'],
        max_days=config['max_days'],
        max_training_days=config['max_training_days'],
        predition_length=config['prediction_length'],
        test_start_values=config['test_start_values'],
        test_sequence_length=config['test_sequence_length'])
    return dataset
Exemplo n.º 3
0
def evaluate_model(model, config):
    if os.path.exists("results"):
        shutil.rmtree("results")
    
    dataset = athena.Dataset("dfw_demand.csv.gz", 
                    index="timestamp", 
                    freq="30min",
                    max_training_days=5,
                    predition_length=48,
                    test_start_values=["2019-07-27 00:00:00"],
                    test_sequence_length=1
                    )

    transform = athena.utils.gluonts.DataTransformGluon(['vehicles'])

    athena.utils.gluonts.evaluate_gluonts(dataset, transform, model, config)

    df = pd.read_csv(glob.glob("{}/*.csv".format(config['directory']))[0])
    assert len(df) == 48
Exemplo n.º 4
0
def test_ds():

    ds = athena.Dataset("dfw_demand.csv.gz", 
                    index="timestamp", 
                    freq="30min",
                    max_days=500,
                    max_training_days=200,
                    predition_length=1,
                    test_start_values=["2019-07-27 00:00:00"],
                    test_sequence_length=4
                    )

    # Ensure the number of cv: len(test_start_values)*test_sequence_length
    assert len(ds.cv) == 4

    # Assert the max days in the dataset
    assert len(ds.df) == 500*ds.rows_per_day
    
    # verify each cv has a max of 200 training days
    for test in ds.cv:
        assert (test['train_stop'] - test['train_start'])/ds.rows_per_day <= 200
Exemplo n.º 5
0
def evaluate_model(model, config):
    if os.path.exists("results"):
        shutil.rmtree("results")

    dataset = athena.Dataset("dfw_demand.csv.gz",
                             index="timestamp",
                             freq="30min",
                             max_days=500,
                             max_training_days=200,
                             predition_length=1,
                             test_start_values=["2019-07-27 13:00:00"],
                             test_sequence_length=4)

    transform = DataTransformSKLearn(
        ['vehicles'], ['vehicles_1', 'vehicles_2', 'vehicles_3'], ['hour'])

    athena.utils.sklearn.evaluate_sklearn(dataset, transform, model, config)

    df = pd.read_csv(glob.glob("{}/*.csv".format(config['directory']))[0])

    assert len(df) == 4  #test_sequence_length*len(test_start_values)