def test_long_format_numerical(self, long_df_numerical, params, recipe_config, threshold_dict, datetime_column):
     groupby_columns = ["country"]
     interval_restrictor = IntervalRestrictor(params)
     output_df = interval_restrictor.compute(long_df_numerical, datetime_column, threshold_dict, groupby_columns=groupby_columns)
     np.testing.assert_array_equal(output_df.Date.values, pd.DatetimeIndex(['1959-01-01T00:00:00.000000000', '1959-01-02T00:00:00.000000000',
                                                                            '1959-01-02T00:00:00.000000000', '1959-01-03T00:00:00.000000000']))
     np.testing.assert_array_equal(output_df.country.values, np.array([1, 1, 2, 2]))
 def test_zero_deviation_without_1st_row(self, edge_df_without_1st_row, config, threshold_dict, datetime_column):
     params = get_interval_restriction_params(config)
     interval_restrictor = IntervalRestrictor(params)
     output_df = interval_restrictor.compute(edge_df_without_1st_row, datetime_column, threshold_dict)
     assert output_df.loc[0, datetime_column] == pd.Timestamp("2020-07-03")
     assert output_df.loc[3, datetime_column] == pd.Timestamp("2020-07-08")
     assert len(output_df.index) == 4
 def test_empty_identifiers(self, df, params, recipe_config, threshold_dict, datetime_column):
     interval_restrictor = IntervalRestrictor(params)
     output_df = interval_restrictor.compute(df, datetime_column, threshold_dict, groupby_columns=[])
     assert output_df.shape == (4, 5)
     output_df = interval_restrictor.compute(df, datetime_column, threshold_dict)
     assert output_df.shape == (4, 5)
     output_df = interval_restrictor.compute(df, datetime_column, threshold_dict, groupby_columns=None)
     assert output_df.shape == (4, 5)
 def test_mix_identifiers(self, long_df_4, params, recipe_config, threshold_dict, datetime_column):
     groupby_columns = ["country", "item", "store"]
     interval_restrictor = IntervalRestrictor(params)
     output_df = interval_restrictor.compute(long_df_4, datetime_column, threshold_dict, groupby_columns=groupby_columns)
     np.testing.assert_array_equal(output_df.Date.values, pd.DatetimeIndex(['2020-01-31T00:00:00.000000000', '2020-02-29T00:00:00.000000000',
                                                                            '2020-02-29T00:00:00.000000000', '2020-01-31T00:00:00.000000000',
                                                                            '2020-01-31T00:00:00.000000000', '2020-02-29T00:00:00.000000000',
                                                                            '2020-02-29T00:00:00.000000000']))
 def test_zero_deviation_edges(self, edge_df, config, threshold_dict, datetime_column):
     # [ch54733] - check if the recipe properly handles the first and the last rows
     params = get_interval_restriction_params(config)
     interval_restrictor = IntervalRestrictor(params)
     output_df = interval_restrictor.compute(edge_df, datetime_column, threshold_dict)
     assert len(output_df.index) == 7
     assert output_df.loc[0, datetime_column] == pd.Timestamp("2020-07-01")
     assert output_df.loc[6, datetime_column] == pd.Timestamp("2020-07-12")
示例#6
0
 def test_day(self, config, threshold_dict, columns):
     config["time_unit"] = "days"
     params = get_interval_restriction_params(config)
     df = get_df_DST("W", columns)
     interval_restrictor = IntervalRestrictor(params)
     output_df = interval_restrictor.compute(df, columns.date, threshold_dict)
     expected_dates = pd.DatetimeIndex(['2019-02-03T00:59:00.000000000', '2019-02-10T00:59:00.000000000',
                                        '2019-02-17T00:59:00.000000000', '2019-02-24T00:59:00.000000000'])
     np.testing.assert_array_equal(expected_dates, output_df[columns.date].values)
示例#7
0
 def test_microseconds(self, config, threshold_dict, columns):
     config["time_unit"] = "microseconds"
     params = get_interval_restriction_params(config)
     df = get_df_DST("U", columns)
     interval_restrictor = IntervalRestrictor(params)
     output_df = interval_restrictor.compute(df, columns.date, threshold_dict)
     expected_dates = pd.DatetimeIndex(['2019-01-31T00:59:00.000000000', '2019-01-31T00:59:00.000001000',
                                        '2019-01-31T00:59:00.000002000', '2019-01-31T00:59:00.000003000'])
     np.testing.assert_array_equal(expected_dates, output_df[columns.date].values)
     assert np.all(output_df["interval_id"].values == "0")
    def test_zero_deviation_annual_edges(self, annual_edge_df, config, threshold_dict, datetime_column):
        params = get_interval_restriction_params(config)
        interval_restrictor = IntervalRestrictor(params)
        df_test = annual_edge_df.copy()
        df_test.loc[:, datetime_column] = pd.to_datetime(df_test[datetime_column])
        df_test = df_test.set_index(datetime_column).sort_index()
        df_initialized = interval_restrictor._initialize_edges(df_test)
        assert df_initialized.index[0] == pd.Timestamp("2010-12-31")
        assert df_initialized.index[-1] == pd.Timestamp("2022-01-02")

        output_df = interval_restrictor.compute(annual_edge_df, datetime_column, threshold_dict)
        assert len(output_df.index) == 7
        assert output_df.loc[0, datetime_column] == pd.Timestamp("2011-01-01")
        assert output_df.loc[6, datetime_column] == pd.Timestamp("2022-01-01")
示例#9
0
    format='timeseries-preparation plugin %(levelname)s - %(message)s')

check_python_version()

# --- Setup
(input_dataset, output_dataset) = get_input_output()
recipe_config = get_recipe_config()
input_dataset_columns = [
    column["name"] for column in input_dataset.read_schema()
]
check_time_column_parameter(recipe_config, input_dataset_columns)
datetime_column = recipe_config.get('datetime_column')
value_column = recipe_config.get('value_column')
min_threshold = recipe_config.get('min_threshold')
max_threshold = recipe_config.get('max_threshold')
threshold_dict = {value_column: (min_threshold, max_threshold)}
groupby_columns = check_and_get_groupby_columns(recipe_config,
                                                input_dataset_columns)
params = get_interval_restriction_params(recipe_config)

# --- Run
df = input_dataset.get_dataframe()
interval_restrictor = IntervalRestrictor(params)
output_df = interval_restrictor.compute(df,
                                        datetime_column,
                                        threshold_dict,
                                        groupby_columns=groupby_columns)

# --- Write output
output_dataset.write_with_schema(output_df)
 def test_segment_beginning(self, edge_df_segment, config, threshold_dict, datetime_column):
     params = get_interval_restriction_params(config)
     interval_restrictor = IntervalRestrictor(params)
     output_df = interval_restrictor.compute(edge_df_segment, datetime_column, threshold_dict)
     assert np.all(output_df.interval_id.values[:4] == "0")
     assert output_df.loc[0, datetime_column] == pd.Timestamp("2020-07-01")