Пример #1
0
def test_gapfilling_forward_ridge_correct():
    arr_with_gaps, real_values = get_array_with_gaps()

    # Find all gap indices in the array
    id_gaps = np.ravel(np.argwhere(arr_with_gaps == -100.0))

    ridge_chain = get_simple_ts_chain(model_root='ridge')
    gapfiller = ModelGapFiller(gap_value=-100.0, chain=ridge_chain)
    without_gap = gapfiller.forward_filling(arr_with_gaps)

    # Get only values in the gaps
    predicted_values = without_gap[id_gaps]
    true_values = real_values[id_gaps]

    rmse_test = mean_squared_error(true_values, predicted_values, squared=False)

    assert rmse_test < 1.0
Пример #2
0
def test_gapfilling_forward_ridge_correct():
    arr_with_gaps, real_values = get_array_with_gaps()

    # Find all gap indices in the array
    id_gaps = np.ravel(np.argwhere(arr_with_gaps == -100.0))

    ridge_chain = TsForecastingChain(PrimaryNode('ridge'))
    gapfiller = ModelGapFiller(gap_value=-100.0, chain=ridge_chain,
                               max_window_size=150)
    without_gap = gapfiller.forward_filling(arr_with_gaps)

    # Get only values in the gaps
    predicted_values = without_gap[id_gaps]
    true_values = real_values[id_gaps]

    rmse_test = mean_squared_error(true_values, predicted_values, squared=False)

    # The RMSE must be less than the standard deviation of random noise * 2.0
    assert rmse_test < 0.2
Пример #3
0
def run_gapfilling_example():
    """
    This function runs an example of filling in gaps in synthetic data

    :return arrays_dict: dictionary with 4 keys ('ridge', 'local_poly',
    'batch_poly', 'linear') that can be used to get arrays without gaps
    :return gap_data: an array with gaps
    :return real_data: an array with actual values in gaps
    """

    # Get synthetic time series
    gap_data, real_data = get_array_with_gaps()

    # Filling in gaps using chain from FEDOT
    node_lagged = PrimaryNode('lagged')
    node_lagged.custom_params = {'window_size': 100}
    node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged])
    ridge_chain = Chain(node_ridge)
    ridge_gapfiller = ModelGapFiller(gap_value=-100.0, chain=ridge_chain)
    without_gap_arr_ridge = \
        ridge_gapfiller.forward_inverse_filling(gap_data)

    # Filling in gaps using simple methods such as polynomial approximation
    simple_gapfill = SimpleGapFiller(gap_value=-100.0)
    without_gap_local_poly = \
        simple_gapfill.local_poly_approximation(gap_data, 4, 150)

    without_gap_batch_poly = \
        simple_gapfill.batch_poly_approximation(gap_data, 4, 150)

    without_gap_linear = \
        simple_gapfill.linear_interpolation(gap_data)

    arrays_dict = {
        'ridge': without_gap_arr_ridge,
        'local_poly': without_gap_local_poly,
        'batch_poly': without_gap_batch_poly,
        'linear': without_gap_linear
    }
    return arrays_dict, gap_data, real_data
Пример #4
0
def run_gapfilling_case(file_path):
    """
    The function runs an example of filling in gaps in a time series with
    air temperature. Real data case.

    :param file_path: path to the file
    :return: pandas dataframe with columns 'date','with_gap','ridge',
    'composite','temperature'
    """

    # Load dataframe
    full_path = os.path.join(str(project_root()), file_path)
    dataframe = pd.read_csv(full_path)
    dataframe['date'] = pd.to_datetime(dataframe['date'])

    # Filling in gaps based on inverted ridge regression model
    ridge_chain = get_simple_chain()
    ridge_gapfiller = ModelGapFiller(gap_value=-100.0, chain=ridge_chain)
    with_gap_array = np.array(dataframe['with_gap'])
    without_gap_arr_ridge = ridge_gapfiller.forward_inverse_filling(
        with_gap_array)
    dataframe['ridge'] = without_gap_arr_ridge

    # Filling in gaps based on a chain of 5 models
    composite_chain = get_composite_chain()
    composite_gapfiller = ModelGapFiller(gap_value=-100.0,
                                         chain=composite_chain)
    without_gap_composite = composite_gapfiller.forward_filling(with_gap_array)
    dataframe['composite'] = without_gap_composite
    return dataframe