def test_custom_stat_report_for_customised_index(self):
     mock_df = pd.DataFrame(data=np.random.rand(10, 2),
                            columns=["attr1", "attr2"])
     mock_object = data_exploration.Exploration(data=mock_df)
     expected = "given_name attr1"
     outcome = mock_object.custom_stat_report(data=mock_df,
                                              name="given_name")
     self.assertEqual(outcome.index[0], expected)
 def test_descriptive_stats_report_for_output_data_type(self):
     mock_df = pd.DataFrame(data=np.random.rand(10, 2),
                            columns=["attr1", "attr2"])
     mock_object = data_exploration.Exploration(data=mock_df)
     expected = pd.DataFrame
     self.assertEqual(
         type(mock_object.descriptive_stats_report(given_data=mock_df)),
         expected)
 def test_distribution_comparison_for_3_pandas_Series(self):
     mock_df = pd.DataFrame(data=np.random.rand(10, 4),
                            columns=["attr1", "attr2", "attr3", "attr4"])
     mock_object = data_exploration.Exploration(data=mock_df)
     self.assertIsNone(
         mock_object.distribution_comparison(series1=mock_df["attr1"],
                                             series2=mock_df["attr2"],
                                             series3=mock_df["attr3"]))
 def test_split_train_test_for_full_and_partial_mode(self):
     mock_df = pd.DataFrame(data=np.random.rand(100, 2),
                            columns=["attr1", "attr2"])
     mock_object_full = data_exploration.Exploration(data=mock_df,
                                                     running_mode="full")
     mock_object_partial = data_exploration.Exploration(
         data=mock_df, running_mode="partial")
     expected_full = [70, 30]
     expected_partial = [5, 2]
     self.assertEqual(len(mock_object_full.split_train_test()[0]),
                      expected_full[0])
     self.assertEqual(len(mock_object_full.split_train_test()[1]),
                      expected_full[1])
     self.assertEqual(len(mock_object_partial.split_train_test()[0]),
                      expected_partial[0])
     self.assertEqual(len(mock_object_partial.split_train_test()[1]),
                      expected_partial[1])
 def test_descriptive_stats_report_for_statistics_results(self):
     mock_df = pd.DataFrame(data=np.random.rand(10, 2),
                            columns=["attr1", "attr2"])
     mock_object = data_exploration.Exploration(data=mock_df)
     expected = mock_df["attr1"].mean()
     self.assertEqual(
         mock_object.descriptive_stats_report(given_data=mock_df).iloc[0,
                                                                       0],
         expected)
 def test_scaler_min_max(self):
     mock_df = pd.DataFrame(data=[10, 20, 30], columns=["attr1"])
     mock_object = data_exploration.Exploration(data=mock_df)
     expected = [1, 0.5, 0]
     self.assertAlmostEqual(
         mock_object.scaler_min_max(data=mock_df)[:, 0].max(), expected[0])
     self.assertTrue(expected[1] in mock_object.scaler_min_max(
         data=mock_df))
     self.assertAlmostEqual(
         mock_object.scaler_min_max(data=mock_df)[:, 0].min(), expected[2])
 def test_plot_series_but_ignore_date_for_2_datasets(self):
     date_times = pd.date_range(start="04-Jan-2021",
                                end="15-Jan-2021",
                                freq="H")
     mock_df = pd.DataFrame(data=np.array([
         np.random.uniform(1, 3, len(date_times)),
         np.random.uniform(3, 5, len(date_times))
     ]).T,
                            columns=["attr1", "attr2"],
                            index=date_times)
     mock_object = data_exploration.Exploration(data=mock_df)
     self.assertIsNone(
         mock_object.plot_series_but_ignore_date(series1=mock_df["attr1"],
                                                 series2=mock_df["attr2"]))
 def test_apply_sliding_window_for_type_outcome_length_and_links_data_target(
         self):
     data_length = 200
     window_length = parameters.General_Params().slid_win_length
     mock_df = pd.DataFrame(data=np.random.rand(data_length, 3),
                            columns=["attr1", "Close", "attr2"])
     mock_object = data_exploration.Exploration(data=mock_df)
     df_out_data, df_out_targets = mock_object.apply_sliding_window(
         scaled_data=np.array(mock_df))
     array_out_data, array_out_targets = mock_object.apply_sliding_window(
         scaled_data=np.array(mock_df))
     self.assertEqual(len(df_out_data[0]), window_length)
     self.assertEqual(len(df_out_data[-1]), window_length)
     self.assertEqual(df_out_data[1]["Close"].iloc[-1], df_out_targets[0])
     self.assertEqual(df_out_data[5]["Close"].iloc[-1], df_out_targets[4])
     self.assertTrue(all(df_out_data[0] == array_out_data[0]))
Exemplo n.º 9
0
from tensorflow.keras.losses import MeanAbsoluteError, MeanSquaredError

from core import data_preparation, data_exploration, learning_rate_strategy, model_developement

df = pd.read_csv(os.path.join(os.getcwd(), "data", "GSK per min.csv"))
""" Data Preparation """
prepare = data_preparation.Preparation(data=df)
prepare.set_timestamps_as_index()
prepare.sort_by_timestamp()
prepare.drop_unused()
prepare.transform_to_float()
prepare.time_series_fillna()
prepare.plot_daily_prices(show=True)
prepare.plot_prices_and_projection(show=True)
""" Data Exploration """
explore = data_exploration.Exploration(data=prepare.data)
train_set, test_set = explore.split_train_test()
explore.distribution_comparison(train_set=train_set["Close"],
                                test_set=test_set["Close"],
                                show=True)
explore.plot_series_but_ignore_date(train_set=train_set["Close"],
                                    test_set=test_set["Close"],
                                    show=True)
train_report = explore.custom_stat_report(data=train_set, name="Train")
test_report = explore.custom_stat_report(data=train_set, name="Test")
explore.box_plots(data=train_set, show=True)
scaled_train_data = explore.scaler_min_max(data=train_set)
scaled_test_data = explore.scaler_min_max(data=test_set)
sliding_window_train_data, sliding_window_train_target = explore.turn_dfs_into_arrays(
    given_data=scaled_train_data)
sliding_window_test_data, sliding_window_test_target = explore.turn_dfs_into_arrays(
 def test_box_plots_for_dataframe_with_4_attributes(self):
     mock_df = pd.DataFrame(data=np.random.rand(10, 4),
                            columns=["attr1", "attr2", "attr3", "attr4"])
     mock_object = data_exploration.Exploration(data=mock_df)
     self.assertIsNone(mock_object.box_plots(data=mock_df))
 def test_color_list_for_output_colors(self):
     mock_list = data_exploration.Exploration(data=pd.DataFrame(
         data=np.random.rand(10), columns=["attr1"])).color_list()
     expected = ["red", "aqua"]
     self.assertEqual(mock_list[2], expected[0])
     self.assertEqual(mock_list[5], expected[1])