Exemplo n.º 1
0
def test_config_variations_3():

    df = pd.DataFrame([range(300), range(1000, 1300)]).T
    df["time"] = pd.date_range("2018-01-01", periods=len(df), freq="H")

    config.update({
        "data": df,
        "datetime_column": "time",
        "freq": "",
        "predicted_column": False,
        "datalength": 100,
        "other_columns": True,
        "analyzeit": 2,
        "default_other_columns_length": 5,
        "data_transform": None,
        "repeatit": 10,
        "error_criterion": "max_error",
        "multiprocessing": None,
        "remove_outliers": None,
        "remove_nans_threshold": 0.8,
        "remove_nans_or_replace": "mean",
        "optimization": True,
        "optimization_variable": "default_n_steps_in",
        "optimization_values": [3, 6, 9],
        "plot_all_optimized_models": True,
        "correlation_threshold": 0.4,
        "standardizeit": "standardize",
        "predicts": 7,
        "smoothit": None,
    })

    assert validate_result(predictit.predict().predictions)
def test_code_259():
    config.update({
        "data":
        r"https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv",  # Full CSV path with suffix
        "predicted_column":
        "Temp",  # Column name that we want to predict
        "datalength":
        200,
        "predicts":
        7,  # Number of predicted values - 7 by default
        "repeatit":
        50,  # Repeat calculation times on shifted data to evaluate error criterion
        "other_columns":
        False,  # Whether use other columns or not
        # Chose models that will be computed - remove if you want to use all the models
        "used_models": [
            "AR",
            "ARIMA",
            "LNU",
            "Conjugate gradient",
            "Sklearn regression",
            "Bayes ridge regression one column one step",
            "Decision tree regression",
        ],
        # Define parameters of models
        "models_parameters": {
            "AR": {
                "used_model": "ar",
                "method": "cmle",
                "trend": "nc",
                "solver": "lbfgs",
            },
            "ARIMA": {
                "used_model": "arima",
                "p": 6,
                "d": 0,
                "q": 0,
            },
            "LNU": {
                "learning_rate": "infer",
                "epochs": 10,
                "w_predict": 0,
                "normalize_learning_rate": False,
            },
            "Conjugate gradient": {
                "epochs": 200
            },
            "Bayes ridge regression": {
                "model": "BayesianRidge",
                "n_iter": 300,
                "alpha_1": 1.0e-6,
                "alpha_2": 1.0e-6,
                "lambda_1": 1.0e-6,
                "lambda_2": 1.0e-6,
            },
        },
    })

    predictions_configured = predictit.predict()
def test_code_140():
    my_data_array = np.random.randn(200, 2)  # Define your data here

    config.update({
        "data_all": {
            "First part": (my_data_array[:100], 0),
            "Second part": (my_data_array[100:], 1)
        },
        "predicted_column": 0,
    })
    compared_models = predictit.compare_models()
def test_code_88():
    config.update({
        "datalength": 300,  # Used datalength
        "predicts": 14,  # Number of predicted values
        "default_n_steps_in":
        12,  # Value of recursive inputs in model (do not use too high - slower and worse predictions)
    })

    # After if you setup prediction as needed, it's simple

    predictions = predictit.predict()
Exemplo n.º 5
0
def test_compare_models_with_optimization():
    config.update({
        "data_all": None,
        "optimization": True,
        "optimization_variable": "data_transform",
        "optimization_values": [None, "difference"],
    })

    result = predictit.compare_models()

    assert result
Exemplo n.º 6
0
def test_config_variations_2():

    config.update({
        "data": "test_random",
        "predicted_column": "",
        "predicts": 4,
        "analyzeit": 1,
        "datetime_column": "",
        "freq": "M",
        "datalength": 100,
        "default_n_steps_in": 3,
        "data_transform": "difference",
        "error_criterion": "rmse",
        "remove_outliers": 3,
        "print_number_of_models": None,
        "print_table": "detailed",
        "plot_library": "matplotlib",
        "show_plot": True,
        "print_time_table": True,
        "correlation_threshold": 0.2,
        "data_extension": None,
        "optimizeit": True,
        "optimizeit_limit": 0.1,
        "optimizeit_details": 2,
        "optimizeit_plot": True,
        "standardizeit": None,
        "multiprocessing": "pool",
        "confidence_interval": None,
        "trace_processes_memory": True,
        "used_models": ["Bayes ridge regression"],
        "models_parameters": {
            "Bayes ridge regression": {
                "model": "BayesianRidge",
                "n_iter": 300,
                "alpha_1": 1.0e-6,
                "alpha_2": 1.0e-6,
                "lambda_1": 1.0e-6,
                "lambda_2": 1.0e-6,
            }
        },
        "fragments": 4,
        "iterations": 2,
        "models_parameters_limits": {
            "Bayes ridge regression": {
                "alpha_1": [0.1e-6, 3e-6],
                "model":
                ["BayesianRidge", "LassoRegression", "LinearRegression"],
            }
        },
    })
    config.hyperparameter_optimization.optimizeit_limit = 10
    assert validate_result(predictit.predict().predictions)
Exemplo n.º 7
0
def test_most_models():

    a = np.array(range(200)) + np.random.rand(200)
    b = np.array(range(200)) + mydatapreprocessing.generate_data.sin(200)
    df = pd.DataFrame([a, b]).T

    config.update({
        "data":
        df,
        "predicts":
        7,
        "datalength":
        200,
        "default_n_steps_in":
        3,
        "error_criterion":
        "mape",
        "used_models": [
            "ARMA",
            "ARIMA",
            "autoreg",
            "SARIMAX",
            "LNU",
            "LNU normalized",
            "LNU with weights predicts",
            "Sklearn regression",
            "Bayes ridge regression",
            "Passive aggressive regression",
            "Gradient boosting",
            "KNeighbors regression",
            "Decision tree regression",
            "Hubber regression",
            "Bagging regression",
            "Stochastic gradient regression",
            "Extreme learning machine",
            "Gen Extreme learning machine",
            "Extra trees regression",
            "Random forest regression",
            "Tensorflow LSTM",
            "Tensorflow MLP",
            "Average short",
            "Average long",
            "Regression",
            "Ridge regression",
        ],
    })

    result = predictit.predict()
    assert result.predictions.isnull().sum().sum() <= 2
def test_code_162():
    config.update({
        "data":
        "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv",
        "predicted_column": "Temp",
        "datalength": 120,
        "optimization": True,
        "optimization_variable": "default_n_steps_in",
        "optimization_values": [4, 6, 8],
        "plot_all_optimized_models": False,
        "print_table": "detailed",  # Print detailed table
        "print_result_details": True,
        "used_models": ["AR", "Sklearn regression"],
    })

    predictions_optimized_config = predictit.predict()
Exemplo n.º 9
0
def test_config_variations_1():

    config.update({
        "embedding": "label",
        "plot_library": "plotly",
        "data":
        "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv",
        "predicted_column": "Temp",
        "datetime_column": "Date",
        "freq": "D",
        "error_criterion": "dtw",
        "max_imported_length": 300,
        "remove_nans_threshold": 0.85,
        "remove_nans_or_replace": "neighbor",
        "trace_processes_memory": False,
        "print_number_of_models": 10,
        "add_fft_columns": 16,
        "data_extension": {
            "differences": True,
            "second_differences": True,
            "multiplications": True,
            "rolling_means": 10,
            "rolling_stds": 10,
            "mean_distances": True,
        },
        "analyzeit": 3,
        "correlation_threshold": 0.2,
        "optimizeit": False,
        "standardizeit": "01",
        "multiprocessing": "process",
        "smoothit": (19, 2),
        "power_transformed": True,
        "analyze_seasonal_decompose": {
            "period": 32,
            "model": "additive"
        },
        "confidence_interval": 0.6,
    })

    assert validate_result(predictit.predict().predictions)
Exemplo n.º 10
0
def test_presets():
    config.update({
        "data":
        "https://www.ncdc.noaa.gov/cag/global/time-series/globe/land_ocean/ytd/12/1880-2016.json",
        "request_datatype_suffix": ".json",
        "predicted_table": "data",
        "data_orientation": "index",
        "predicted_column": 0,
        "datalength": 100,
    })

    config.update({"use_config_preset": "fast"})
    preset_result = predictit.predict().predictions
    assert validate_result(preset_result)

    config.update({"use_config_preset": "normal"})
    assert validate_result(preset_result)