Пример #1
0
def test_config_variations_3():

    df = pd.DataFrame([range(300), range(1000, 1300)]).T
    df["time"] = pd.date_range("2018-01-01", periods=len(df), freq="H")

    config.update({
        "data": df,
        "datetime_column": "time",
        "freq": "",
        "predicted_column": False,
        "datalength": 100,
        "other_columns": True,
        "analyzeit": 2,
        "default_other_columns_length": 5,
        "data_transform": None,
        "repeatit": 10,
        "error_criterion": "max_error",
        "multiprocessing": None,
        "remove_outliers": None,
        "remove_nans_threshold": 0.8,
        "remove_nans_or_replace": "mean",
        "optimization": True,
        "optimization_variable": "default_n_steps_in",
        "optimization_values": [3, 6, 9],
        "plot_all_optimized_models": True,
        "correlation_threshold": 0.4,
        "standardizeit": "standardize",
        "predicts": 7,
        "smoothit": None,
    })

    assert validate_result(predictit.predict().predictions)
def test_code_259():
    config.update({
        "data":
        r"https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv",  # Full CSV path with suffix
        "predicted_column":
        "Temp",  # Column name that we want to predict
        "datalength":
        200,
        "predicts":
        7,  # Number of predicted values - 7 by default
        "repeatit":
        50,  # Repeat calculation times on shifted data to evaluate error criterion
        "other_columns":
        False,  # Whether use other columns or not
        # Chose models that will be computed - remove if you want to use all the models
        "used_models": [
            "AR",
            "ARIMA",
            "LNU",
            "Conjugate gradient",
            "Sklearn regression",
            "Bayes ridge regression one column one step",
            "Decision tree regression",
        ],
        # Define parameters of models
        "models_parameters": {
            "AR": {
                "used_model": "ar",
                "method": "cmle",
                "trend": "nc",
                "solver": "lbfgs",
            },
            "ARIMA": {
                "used_model": "arima",
                "p": 6,
                "d": 0,
                "q": 0,
            },
            "LNU": {
                "learning_rate": "infer",
                "epochs": 10,
                "w_predict": 0,
                "normalize_learning_rate": False,
            },
            "Conjugate gradient": {
                "epochs": 200
            },
            "Bayes ridge regression": {
                "model": "BayesianRidge",
                "n_iter": 300,
                "alpha_1": 1.0e-6,
                "alpha_2": 1.0e-6,
                "lambda_1": 1.0e-6,
                "lambda_2": 1.0e-6,
            },
        },
    })

    predictions_configured = predictit.predict()
def test_code_88():
    config.update({
        "datalength": 300,  # Used datalength
        "predicts": 14,  # Number of predicted values
        "default_n_steps_in":
        12,  # Value of recursive inputs in model (do not use too high - slower and worse predictions)
    })

    # After if you setup prediction as needed, it's simple

    predictions = predictit.predict()
Пример #4
0
def test_config_variations_2():

    config.update({
        "data": "test_random",
        "predicted_column": "",
        "predicts": 4,
        "analyzeit": 1,
        "datetime_column": "",
        "freq": "M",
        "datalength": 100,
        "default_n_steps_in": 3,
        "data_transform": "difference",
        "error_criterion": "rmse",
        "remove_outliers": 3,
        "print_number_of_models": None,
        "print_table": "detailed",
        "plot_library": "matplotlib",
        "show_plot": True,
        "print_time_table": True,
        "correlation_threshold": 0.2,
        "data_extension": None,
        "optimizeit": True,
        "optimizeit_limit": 0.1,
        "optimizeit_details": 2,
        "optimizeit_plot": True,
        "standardizeit": None,
        "multiprocessing": "pool",
        "confidence_interval": None,
        "trace_processes_memory": True,
        "used_models": ["Bayes ridge regression"],
        "models_parameters": {
            "Bayes ridge regression": {
                "model": "BayesianRidge",
                "n_iter": 300,
                "alpha_1": 1.0e-6,
                "alpha_2": 1.0e-6,
                "lambda_1": 1.0e-6,
                "lambda_2": 1.0e-6,
            }
        },
        "fragments": 4,
        "iterations": 2,
        "models_parameters_limits": {
            "Bayes ridge regression": {
                "alpha_1": [0.1e-6, 3e-6],
                "model":
                ["BayesianRidge", "LassoRegression", "LinearRegression"],
            }
        },
    })
    config.hyperparameter_optimization.optimizeit_limit = 10
    assert validate_result(predictit.predict().predictions)
Пример #5
0
def test_most_models():

    a = np.array(range(200)) + np.random.rand(200)
    b = np.array(range(200)) + mydatapreprocessing.generate_data.sin(200)
    df = pd.DataFrame([a, b]).T

    config.update({
        "data":
        df,
        "predicts":
        7,
        "datalength":
        200,
        "default_n_steps_in":
        3,
        "error_criterion":
        "mape",
        "used_models": [
            "ARMA",
            "ARIMA",
            "autoreg",
            "SARIMAX",
            "LNU",
            "LNU normalized",
            "LNU with weights predicts",
            "Sklearn regression",
            "Bayes ridge regression",
            "Passive aggressive regression",
            "Gradient boosting",
            "KNeighbors regression",
            "Decision tree regression",
            "Hubber regression",
            "Bagging regression",
            "Stochastic gradient regression",
            "Extreme learning machine",
            "Gen Extreme learning machine",
            "Extra trees regression",
            "Random forest regression",
            "Tensorflow LSTM",
            "Tensorflow MLP",
            "Average short",
            "Average long",
            "Regression",
            "Ridge regression",
        ],
    })

    result = predictit.predict()
    assert result.predictions.isnull().sum().sum() <= 2
Пример #6
0
def test_config_inputs():
    predict_with_params = predictit.predict(predicts=3)

    data = [
        pd.DataFrame(np.random.randn(100, 3), columns=["a", "b", "c"]),
        pd.DataFrame(np.random.randn(80, 3), columns=["e", "b", "c"]),
    ]
    predict_with_positional_params = predictit.predict(data, "b")

    config_test = config.copy()
    config_test.output.predicts = 4
    predict_config_as_param = predictit.predict(config=config_test)

    cli_args_str = (
        "python predictit/main.py --used_function predict --data "
        "'https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv' "
        "--predicted_column 'Temp' ")

    result_cli = subprocess.check_output(cli_args_str.split(" "))

    assert len(predict_with_params.best_prediction) == 3
    assert len(predict_config_as_param.best_prediction) == 4
    assert validate_result(predict_with_positional_params.predictions)
    assert "Best model is" in str(result_cli)
def test_code_162():
    config.update({
        "data":
        "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv",
        "predicted_column": "Temp",
        "datalength": 120,
        "optimization": True,
        "optimization_variable": "default_n_steps_in",
        "optimization_values": [4, 6, 8],
        "plot_all_optimized_models": False,
        "print_table": "detailed",  # Print detailed table
        "print_result_details": True,
        "used_models": ["AR", "Sklearn regression"],
    })

    predictions_optimized_config = predictit.predict()
Пример #8
0
def test_presets():
    config.update({
        "data":
        "https://www.ncdc.noaa.gov/cag/global/time-series/globe/land_ocean/ytd/12/1880-2016.json",
        "request_datatype_suffix": ".json",
        "predicted_table": "data",
        "data_orientation": "index",
        "predicted_column": 0,
        "datalength": 100,
    })

    config.update({"use_config_preset": "fast"})
    preset_result = predictit.predict().predictions
    assert validate_result(preset_result)

    config.update({"use_config_preset": "normal"})
    assert validate_result(preset_result)
Пример #9
0
def test_config_variations_1():

    config.update({
        "embedding": "label",
        "plot_library": "plotly",
        "data":
        "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv",
        "predicted_column": "Temp",
        "datetime_column": "Date",
        "freq": "D",
        "error_criterion": "dtw",
        "max_imported_length": 300,
        "remove_nans_threshold": 0.85,
        "remove_nans_or_replace": "neighbor",
        "trace_processes_memory": False,
        "print_number_of_models": 10,
        "add_fft_columns": 16,
        "data_extension": {
            "differences": True,
            "second_differences": True,
            "multiplications": True,
            "rolling_means": 10,
            "rolling_stds": 10,
            "mean_distances": True,
        },
        "analyzeit": 3,
        "correlation_threshold": 0.2,
        "optimizeit": False,
        "standardizeit": "01",
        "multiprocessing": "process",
        "smoothit": (19, 2),
        "power_transformed": True,
        "analyze_seasonal_decompose": {
            "period": 32,
            "model": "additive"
        },
        "confidence_interval": 0.6,
    })

    assert validate_result(predictit.predict().predictions)
Пример #10
0
def test_default_config_and_outputs():
    config.reset()

    results_default = predictit.predict()

    # Validate tables
    validate_tables = True

    for i in [
            results_default.tables.detailed,
            results_default.tables.simple,
            results_default.tables.time,
    ]:
        if not isinstance(i, str) or len(i) < 20:
            validate_tables = False

    assert validate_result(results_default.predictions)
    assert validate_result(results_default.best_prediction)
    assert validate_result(results_default.results_df)
    assert validate_tables
    assert np.isnan(results_default.with_history["Predicted column"].iloc[-1])
    assert not np.isnan((results_default.with_history.iloc[-1, -1]))
def test_code_118():
    my_data = pd.DataFrame(np.random.randn(100, 2), columns=["a", "b"])
    predictions_1_positional = predictit.predict(my_data, "b")
def test_code_112():
    predictions_1 = predictit.predict(data=np.random.randn(100, 2),
                                      predicted_column=1,
                                      predicts=3)
def test_code_102():
    other_config = config.copy()  # or predictit.configuration.Config()
    other_config.predicts = 30  # This will not affect config for other examples
    predictions_3 = predictit.predict(config=other_config)