def test_gapfilling_forward_ridge_correct(): arr_with_gaps, real_values = get_array_with_gaps() # Find all gap indices in the array id_gaps = np.ravel(np.argwhere(arr_with_gaps == -100.0)) ridge_chain = get_simple_ts_chain(model_root='ridge') gapfiller = ModelGapFiller(gap_value=-100.0, chain=ridge_chain) without_gap = gapfiller.forward_filling(arr_with_gaps) # Get only values in the gaps predicted_values = without_gap[id_gaps] true_values = real_values[id_gaps] rmse_test = mean_squared_error(true_values, predicted_values, squared=False) assert rmse_test < 1.0
def test_gapfilling_forward_ridge_correct(): arr_with_gaps, real_values = get_array_with_gaps() # Find all gap indices in the array id_gaps = np.ravel(np.argwhere(arr_with_gaps == -100.0)) ridge_chain = TsForecastingChain(PrimaryNode('ridge')) gapfiller = ModelGapFiller(gap_value=-100.0, chain=ridge_chain, max_window_size=150) without_gap = gapfiller.forward_filling(arr_with_gaps) # Get only values in the gaps predicted_values = without_gap[id_gaps] true_values = real_values[id_gaps] rmse_test = mean_squared_error(true_values, predicted_values, squared=False) # The RMSE must be less than the standard deviation of random noise * 2.0 assert rmse_test < 0.2
def run_gapfilling_example(): """ This function runs an example of filling in gaps in synthetic data :return arrays_dict: dictionary with 4 keys ('ridge', 'local_poly', 'batch_poly', 'linear') that can be used to get arrays without gaps :return gap_data: an array with gaps :return real_data: an array with actual values in gaps """ # Get synthetic time series gap_data, real_data = get_array_with_gaps() # Filling in gaps using chain from FEDOT node_lagged = PrimaryNode('lagged') node_lagged.custom_params = {'window_size': 100} node_ridge = SecondaryNode('ridge', nodes_from=[node_lagged]) ridge_chain = Chain(node_ridge) ridge_gapfiller = ModelGapFiller(gap_value=-100.0, chain=ridge_chain) without_gap_arr_ridge = \ ridge_gapfiller.forward_inverse_filling(gap_data) # Filling in gaps using simple methods such as polynomial approximation simple_gapfill = SimpleGapFiller(gap_value=-100.0) without_gap_local_poly = \ simple_gapfill.local_poly_approximation(gap_data, 4, 150) without_gap_batch_poly = \ simple_gapfill.batch_poly_approximation(gap_data, 4, 150) without_gap_linear = \ simple_gapfill.linear_interpolation(gap_data) arrays_dict = { 'ridge': without_gap_arr_ridge, 'local_poly': without_gap_local_poly, 'batch_poly': without_gap_batch_poly, 'linear': without_gap_linear } return arrays_dict, gap_data, real_data
def run_gapfilling_case(file_path): """ The function runs an example of filling in gaps in a time series with air temperature. Real data case. :param file_path: path to the file :return: pandas dataframe with columns 'date','with_gap','ridge', 'composite','temperature' """ # Load dataframe full_path = os.path.join(str(project_root()), file_path) dataframe = pd.read_csv(full_path) dataframe['date'] = pd.to_datetime(dataframe['date']) # Filling in gaps based on inverted ridge regression model ridge_chain = get_simple_chain() ridge_gapfiller = ModelGapFiller(gap_value=-100.0, chain=ridge_chain) with_gap_array = np.array(dataframe['with_gap']) without_gap_arr_ridge = ridge_gapfiller.forward_inverse_filling( with_gap_array) dataframe['ridge'] = without_gap_arr_ridge # Filling in gaps based on a chain of 5 models composite_chain = get_composite_chain() composite_gapfiller = ModelGapFiller(gap_value=-100.0, chain=composite_chain) without_gap_composite = composite_gapfiller.forward_filling(with_gap_array) dataframe['composite'] = without_gap_composite return dataframe