示例#1
0
def test_get_df():
    dl = DataLoader()
    # Daily data
    data_path = dl.get_data_home(data_dir=None, data_sub_dir="daily")
    df = dl.get_df(data_path=data_path, data_name="daily_peyton_manning")
    assert list(df.columns) == ["ts", "y"]
    assert df.shape == (2905, 2)

    # Hourly data
    data_path = dl.get_data_home(data_dir=None, data_sub_dir="hourly")
    df = dl.get_df(data_path=data_path, data_name="hourly_parking")
    assert list(df.columns) == [
        "SystemCodeNumber", "Capacity", "Occupancy", "LastUpdated"
    ]
    assert df.shape == (35717, 4)

    # Error due to wrong file name
    data_path = dl.get_data_home(data_dir=None, data_sub_dir="daily")
    file_path = os.path.join(data_path, "parking.csv")
    file_names = dl.get_data_names(data_path=data_path)
    with pytest.raises(
            ValueError,
            match=
            fr"Given file path '{file_path}' is not found. Available datasets "
            fr"in data directory '{data_path}' are \{file_names}\."):
        dl.get_df(data_path=data_path, data_name="parking")
示例#2
0
def test_get_data_names():
    dl = DataLoader()
    # Returns empty set as there is no .csv file in 'data' folder
    data_path = dl.get_data_home()
    file_names = dl.get_data_names(data_path=data_path)
    assert file_names == []

    data_path = dl.get_data_home(data_sub_dir="daily")
    file_names = dl.get_data_names(data_path=data_path)
    assert set(file_names) == {
        "daily_temperature_australia", "daily_demand_order",
        "daily_female_births", "daily_istanbul_stock", "daily_peyton_manning"
    }
示例#3
0
def test_get_data_home():
    dl = DataLoader()
    # Default parameters
    data_home = dl.get_data_home()
    assert os.path.basename(os.path.normpath(data_home)) == "data"

    # With subdirectory
    data_home = dl.get_data_home(data_sub_dir="daily")
    assert os.path.basename(os.path.normpath(data_home)) == "daily"

    # Error due to non existing folder
    data_dir = "/home/data"
    with pytest.raises(ValueError, match=f"Requested data directory '{data_dir}' does not exist."):
        dl.get_data_home(data_dir=data_dir)
def test_benchmark_silverkite_template_with_real_data():
    # setting every list to 1 item to speed up test case
    forecast_horizons = [30]
    max_cvs = [3]
    fit_algorithms = ["linear"]
    metric = EvaluationMetricEnum.MeanSquaredError
    evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name)

    # real data
    dl = DataLoader()
    data_path = dl.get_data_home(data_sub_dir="daily")
    data_name = "daily_female_births"
    df = dl.get_df(data_path=data_path, data_name="daily_female_births")
    time_col = "Date"
    value_col = "Births"
    metadata = MetadataParam(time_col=time_col, value_col=value_col, freq="D")
    result_silverkite_real = benchmark_silverkite_template(
        data_name=data_name,
        df=df,
        metadata=metadata,
        evaluation_metric=evaluation_metric,
        forecast_horizons=forecast_horizons,
        fit_algorithms=fit_algorithms,
        max_cvs=max_cvs)

    result_silverkite_real = result_silverkite_real[0]
    assert result_silverkite_real["data_name"] == data_name
    assert result_silverkite_real["forecast_model_name"] == "silverkite_linear"
    assert result_silverkite_real["train_period"] == df.shape[0]
    assert result_silverkite_real["forecast_horizon"] == 30
    assert result_silverkite_real["cv_folds"] == 3
示例#5
0
def test_estimator_plot_components_from_forecaster():
    """Tests estimator's plot_components function after the Forecaster has set everything up at the top most level"""
    # Test with real data (Female-births) via model template
    dl = DataLoader()
    data_path = dl.get_data_home(data_sub_dir="daily")
    df = dl.get_df(data_path=data_path, data_name="daily_female_births")
    metadata = MetadataParam(time_col="Date", value_col="Births", freq="D")
    model_components = ModelComponentsParam(
        seasonality={
            "yearly_seasonality": True,
            "quarterly_seasonality": True,
            "weekly_seasonality": True,
            "daily_seasonality": False
        })
    result = Forecaster().run_forecast_config(
        df=df,
        config=ForecastConfig(
            model_template=ModelTemplateEnum.SILVERKITE.name,
            forecast_horizon=30,  # forecast 1 month
            coverage=0.95,  # 95% prediction intervals
            metadata_param=metadata,
            model_components_param=model_components))
    estimator = result.model.steps[-1][-1]
    assert estimator.plot_components()