class DeepLearningTest(unittest.TestCase): PATH_DATASET_HOURLY = '../../../' + PATH_DATASET.get('hourly') PATH_DATASET_DAILY = '../../../' + PATH_DATASET.get('daily') df_hourly = load_dataframe_from_csv(PATH_DATASET_HOURLY) df_daily = load_dataframe_from_csv(PATH_DATASET_DAILY) def test_create_net(self): # TODO pass
class ModelingUtilsTest(unittest.TestCase): PATH_DATASET_HOURLY = '../../../' + PATH_DATASET.get('hourly') PATH_DATASET_DAILY = '../../../' + PATH_DATASET.get('daily') df_hourly = load_dataframe_from_csv(PATH_DATASET_HOURLY) df_daily = load_dataframe_from_csv(PATH_DATASET_DAILY) def test_split_data(self): print(self.df_hourly.dtypes) train, test = split_data(self.df_hourly, test_size=TEST_SIZE) expected_size = 524 self.assertEqual(expected_size, train.shape[0])
def runner(args: Namespace) -> None: df_bikes = load_dataframe_from_csv( create_path(args.home_path, PATH_BIKES_RAW)) df_bikes = clean_bikes_data(df_bikes, without_employees=True, remove_outliers=True) save_dataframe(df_bikes, create_path(args.home_path, PATH_BIKES_CLEAN))
class AggregationOperationsTest(unittest.TestCase): PATH_BIKES_CLEAN = '../../../' + PATH_BIKES_CLEAN PATH_WEATHER = '../../../' + PATH_AEMET_PER_DAY df_bikes = load_dataframe_from_csv(PATH_BIKES_CLEAN, parse_dates=[COL_BIKES_DATE]) df_weather = load_dataframe_from_json(PATH_WEATHER, parse_dates=[COL_WEATHER_DATE]) def test_preprocess_rides_per_day(self): df_rides_per_day = preprocess_rides_per_day(self.df_bikes) expected_size = 28 self.assertEqual(df_rides_per_day.shape[0], expected_size) self.assertIn(COL_BIKES_RIDES, df_rides_per_day.columns) def test_preprocess_rides_per_hour(self): df_rides_per_hour = preprocess_rides_per_hour(self.df_bikes) expected_size = 656 self.assertIn(COL_BIKES_RIDES, df_rides_per_hour.columns) self.assertIn(COL_BIKES_HOUR, df_rides_per_hour.columns) self.assertEqual(df_rides_per_hour.shape[0], expected_size) def test_add_mean_rides_for_day(self): df_rides_per_day = preprocess_rides_per_day(self.df_bikes) dd = add_mean_rides_for_day(df_rides_per_day) self.assertEqual(0, 0) def test_add_weather_data_per_day(self): df_rides_per_day = preprocess_rides_per_day(self.df_bikes) df_with_weather = add_weather_data_per_day(df_rides_per_day, self.df_weather) self.assertIn(COL_WEATHER_RAIN, df_with_weather.columns) self.assertIn(COL_WEATHER_TEMP_MEAN, df_with_weather.columns) self.assertIn(COL_WEATHER_WIND_MEAN, df_with_weather.columns) def test_prepare_daily_data(self): dataset = prepare_daily_data(self.df_bikes, self.df_weather) self.assertNotIn(COL_BIKES_DATE, dataset.columns) def test_get_temperature_model(self): model = get_temperature_model(20, 24, 10, 12) print(model.predict(np.array(11.0).reshape(1, -1))) self.assertAlmostEqual(model.coef_[0], 2.0) self.assertAlmostEqual(model.intercept_, 0.0) def test_get_temperature_simple(self): expected_value = 20.0 self.assertEqual(expected_value, get_temperature_simple(7, 20, 15, 35, 6, 14)) def test_get_hourly_weather(self): df_rides_per_hour = preprocess_rides_per_hour(self.df_bikes) df_with_weather = add_weather_data_per_day(df_rides_per_hour, self.df_weather) df_with_weather_hourly = get_hourly_weather(df_with_weather) self.assertIn(COL_WEATHER_TEMP_HOURLY, df_with_weather_hourly.columns)
def runner(args: Namespace) -> None: dataset = load_dataframe_from_csv( create_path(args.home_path, PATH_DATASET.get(args.sampling_frequency))) xgb_model, metrics = xgboost_model(dataset, args.sampling_frequency) metrics = { metric_name: str(metric_value) for metric_name, metric_value in metrics.items() } # TODO implement saving XGB model in create_path(args.home_path, PATH_RESULTS[args.sampling_frequency]['xgboost']['model'])) with open( create_path( args.home_path, PATH_RESULTS[args.sampling_frequency]['xgboost']['metrics']), 'w') as metrics_file: # TODO refactor this as a function metrics_file.write(json.dumps(metrics))
class CleaningOperationsTest(unittest.TestCase): PATH_BIKES = '../../../' + PATH_BIKES_RAW PATH_WEATHER = '../../../' + PATH_AEMET_PER_DAY df_bikes = load_dataframe_from_csv(PATH_BIKES) df_weather = load_dataframe_from_json(PATH_WEATHER) def test_clean_stations(self): # TODO pass def test_transform_types(self): # TODO pass def test_remove_outliers_travel_time(self): # TODO improve test expected_shape = 0 df_bikes = transform_types_bikes(self.df_bikes) upper_limit = df_bikes[COL_BIKES_TRAVEL_TIME].quantile(UPPER_QUANTILE) lower_limit = df_bikes[COL_BIKES_TRAVEL_TIME].quantile(LOWER_QUANTILE) df_bikes_clean = remove_outliers_travel_time(df_bikes) self.assertEqual( df_bikes_clean[ (df_bikes_clean[COL_BIKES_TRAVEL_TIME] > upper_limit) & (df_bikes_clean[COL_BIKES_TRAVEL_TIME] < lower_limit)]. shape[0], expected_shape) def test_clean_date(self): # TODO improve test df_bikes = transform_types_bikes(self.df_bikes) df_bikes_clean = clean_date_bikes(df_bikes) self.assertIn(COL_BIKES_DAY_OF_WEEK, df_bikes_clean.columns) self.assertIn(COL_BIKES_DAY, df_bikes_clean.columns) self.assertIn(COL_BIKES_MONTH, df_bikes_clean.columns) def test_filter_out_employees(self): # TODO pass def test_clean_weather_data(self): df_weather_clean = clean_weather_data(self.df_weather) self.assertEqual(df_weather_clean[COL_WEATHER_RAIN].dtype, 'float32') self.assertEqual(df_weather_clean[COL_WEATHER_TEMP_MEAN].dtype, 'float32') self.assertEqual(df_weather_clean[COL_WEATHER_WIND_MEAN].dtype, 'float32')
def runner(args: Namespace) -> None: df_bikes_clean = load_dataframe_from_csv(create_path( args.home_path, PATH_BIKES_CLEAN), parse_dates=[COL_BIKES_DATE]) df_weather = load_dataframe_from_json(create_path(args.home_path, PATH_AEMET_PER_DAY), parse_dates=[COL_WEATHER_DATE]) df_weather = clean_weather_data(df_weather) if args.sampling_frequency == 'daily': # extract daily/hourly value as constant df_prepared = prepare_daily_data(df_bikes_clean, df_weather) elif args.sampling_frequency == 'hourly': df_prepared = prepare_hourly_data(df_bikes_clean, df_weather) pass # TODO implement else, return error save_dataframe( df_prepared, create_path(args.home_path, PATH_DATASET.get(args.sampling_frequency)))
def runner(args: Namespace) -> None: dataset = load_dataframe_from_csv( create_path(args.home_path, PATH_DATASET.get(args.sampling_frequency))) net, metrics = deep_learning_model(dataset) metrics = { metric_name: str(metric_value) for metric_name, metric_value in metrics.items() } save_model( net, create_path( args.home_path, PATH_RESULTS[args.sampling_frequency]['deep-learning']['model'])) with open( create_path( args.home_path, PATH_RESULTS[args.sampling_frequency] ['deep-learning']['metrics']), 'w') as metrics_file: # TODO refactor this as a function metrics_file.write(json.dumps(metrics))
class XGBoostTest(unittest.TestCase): PATH_DATASET_DAILY = '../../../' + PATH_DATASET.get('daily') df_daily = load_dataframe_from_csv(PATH_DATASET_DAILY) PATH_DATASET_HOURLY = '../../../' + PATH_DATASET.get('hourly') df_hourly = load_dataframe_from_csv(PATH_DATASET_HOURLY)