def test_load_predictions_from_cdc_csv_file(self): # sanity-check that the predictions get converted and then loaded into the database project = Project.objects.create() make_cdc_units_and_targets(project) forecast_model = ForecastModel.objects.create(project=project, name='model', abbreviation='abbrev') time_zero = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2017, 1, 1)) cdc_csv_path = Path( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv' ) # EW01 2017 forecast = Forecast.objects.create(forecast_model=forecast_model, source=cdc_csv_path.name, time_zero=time_zero) with open(self.cdc_csv_path) as cdc_csv_fp: json_io_dict = json_io_dict_from_cdc_csv_file(2011, cdc_csv_fp) load_predictions_from_json_io_dict(forecast, json_io_dict, False) self.assertEqual(729, forecast.get_num_rows()) self.assertEqual(722, forecast.bin_distribution_qs().count()) # 729 - 7 self.assertEqual(0, forecast.named_distribution_qs().count()) self.assertEqual(7, forecast.point_prediction_qs().count()) self.assertEqual(0, forecast.sample_distribution_qs().count()) self.assertEqual(0, forecast.quantile_prediction_qs().count())
def test_model_score_change_truths(self): project2 = Project.objects.create() make_cdc_units_and_targets(project2) # adding project truth should update all of its models' score_change.changed_at. test with no models -> ensure # Project._update_model_score_changes() is called with patch('forecast_app.models.Project._update_model_score_changes' ) as update_mock: load_truth_data( project2, Path('forecast_app/tests/truth_data/truths-ok.csv')) self.assertEqual( 2, update_mock.call_count ) # called once each: delete_truth_data(), load_truth_data() # adding project truth should update all of its models' score_change.changed_at. test with one model forecast_model2 = ForecastModel.objects.create(project=project2, name='name', abbreviation='abbrev') before_changed_at = forecast_model2.score_change.changed_at load_truth_data(project2, Path('forecast_app/tests/truth_data/truths-ok.csv')) forecast_model2.score_change.refresh_from_db() self.assertNotEqual(before_changed_at, forecast_model2.score_change.changed_at) # deleting project truth should update all of its models' score_change.changed_at before_changed_at = forecast_model2.score_change.changed_at project2.delete_truth_data() forecast_model2.score_change.refresh_from_db() self.assertNotEqual(before_changed_at, forecast_model2.score_change.changed_at)
def setUpTestData(cls): cls.project = Project.objects.create() make_cdc_units_and_targets(cls.project) cls.forecast_model = ForecastModel.objects.create( project=cls.project, name='name', abbreviation='abbrev') time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2017, 1))) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv' ) # EW01 2017 cls.forecast1 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2017, 2))) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW2-KoTstable-2017-01-23.csv' ) # EW02 2017 cls.forecast2 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2016, 51))) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW51-KoTstable-2017-01-03.csv' ) # EW51 2016 cls.forecast3 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2016, 52))) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW52-KoTstable-2017-01-09.csv' ) # EW52 2016 cls.forecast4 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) # 'mini' season for testing. from: # model_error_calculations.txt -> model_error_calculations.py -> model_error_calculations.xlsx: cls.exp_target_to_mae = { '1 wk ahead': 0.215904853, '2 wk ahead': 0.458186984, '3 wk ahead': 0.950515864, '4 wk ahead': 1.482010693 } load_truth_data( cls.project, Path('forecast_app/tests/truth_data/mean-abs-error-truths.csv')) # score needed for MAE calculation Score.ensure_all_scores_exist() cls.score = Score.objects.filter( abbreviation='abs_error').first() # hard-coded official abbrev cls.score.update_score_for_model(cls.forecast_model)
def fill_cdc_project(project, mo_user, is_public): project.description = "description" project.home_url = "http://example.com/" project.core_data = "http://example.com/" # make the Units and Targets via cdc-project.json (recall it has no timezeros) make_cdc_units_and_targets(project) # make two TimeZeros - one for ground truth, and one for the forecast's data: # EW1-KoTsarima-2017-01-17-small.csv -> pymmwr.date_to_mmwr_week(datetime.date(2017, 1, 17)) # EW01 2017 # -> {'year': 2017, 'week': 3, 'day': 3} time_zero1 = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2017, 1, 17), data_version_date=None) TimeZero.objects.create(project=project, timezero_date=datetime.date(2017, 1, 24), data_version_date=None) # load ground truth load_truth_data( project, Path('forecast_app/tests/truth_data/2017-01-17-truths.csv'), is_convert_na_none=True) # create the two models click.echo("creating ForecastModel") forecast_model1 = ForecastModel.objects.create( project=project, name=f'Test ForecastModel1 ({"public" if is_public else "private"})', abbreviation='model1_abbrev', team_name='ForecastModel1 team', description="a ForecastModel for testing", home_url='http://example.com', owner=mo_user) # load the forecasts using a small data file csv_file_path = Path( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv') # EW01 2017 click.echo( "* loading forecast into forecast_model={}, csv_file_path={}".format( forecast_model1, csv_file_path)) start_time = timeit.default_timer() forecast1 = load_cdc_csv_forecast_file(2016, forecast_model1, csv_file_path, time_zero1) click.echo(" loaded forecast={}. {}".format( forecast1, timeit.default_timer() - start_time)) ForecastModel.objects.create( project=project, name=f'Test ForecastModel2 ({"public" if is_public else "private"})', abbreviation='model2_abbrev', # team_name='ForecastModel2 team', # leave default ('') description="a second ForecastModel for testing", home_url='http://example.com', owner=mo_user)
def test_load_truth_data(self): load_truth_data(self.project, Path('forecast_app/tests/truth_data/truths-ok.csv'), is_convert_na_none=True) self.assertEqual(5, truth_data_qs(self.project).count()) self.assertTrue(is_truth_data_loaded(self.project)) # csv references non-existent TimeZero in Project: the bad timezero 2017-01-02 is skipped by # _read_truth_data_rows(), but the remaining data that's loaded (the three 2017-01-01 rows) is therefore a # subset. this raised 'new data is a subset of previous' prior to this issue: # [support truth "diff" uploads #319](https://github.com/reichlab/forecast-repository/issues/319), but now # subsets are allowed. load_truth_data( self.project, Path('forecast_app/tests/truth_data/truths-bad-timezero.csv'), 'truths-bad-timezero.csv', is_convert_na_none=True) # csv references non-existent unit in Project: the bad unit is skipped, again resulting in a subset. again, # subsets are now allowed load_truth_data( self.project, Path('forecast_app/tests/truth_data/truths-bad-location.csv'), 'truths-bad-location.csv', is_convert_na_none=True) # csv references non-existent target in Project: the bad target is skipped. subset is allowed load_truth_data( self.project, Path('forecast_app/tests/truth_data/truths-bad-target.csv'), 'truths-bad-target.csv', is_convert_na_none=True) project2 = Project.objects.create() make_cdc_units_and_targets(project2) self.assertEqual(0, truth_data_qs(project2).count()) self.assertFalse(is_truth_data_loaded(project2)) TimeZero.objects.create(project=project2, timezero_date=datetime.date(2017, 1, 1)) load_truth_data(project2, Path('forecast_app/tests/truth_data/truths-ok.csv'), is_convert_na_none=True) self.assertEqual(5, truth_data_qs(project2).count()) # test get_truth_data_preview() exp_truth_preview = [ (datetime.date(2017, 1, 1), 'US National', '1 wk ahead', 0.73102), (datetime.date(2017, 1, 1), 'US National', '2 wk ahead', 0.688338), (datetime.date(2017, 1, 1), 'US National', '3 wk ahead', 0.732049), (datetime.date(2017, 1, 1), 'US National', '4 wk ahead', 0.911641), (datetime.date(2017, 1, 1), 'US National', 'Season onset', '2017-11-20') ] self.assertEqual(sorted(exp_truth_preview), sorted(get_truth_data_preview(project2)))
def test_model_score_change_forecasts(self): # creating a new model should set its score_change.changed_at project2 = Project.objects.create() make_cdc_units_and_targets(project2) time_zero = TimeZero.objects.create( project=project2, timezero_date=datetime.date.today()) forecast_model2 = ForecastModel.objects.create(project=project2, name='name', abbreviation='abbrev') self.assertIsInstance(forecast_model2.score_change.changed_at, datetime.datetime) # adding a forecast should update its model's score_change.changed_at before_changed_at = forecast_model2.score_change.changed_at csv_file_path = Path( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv' ) # EW01 2017 forecast2 = load_cdc_csv_forecast_file(2016, forecast_model2, csv_file_path, time_zero) self.assertNotEqual(before_changed_at, forecast_model2.score_change.changed_at) self.assertLess( before_changed_at, forecast_model2.score_change.changed_at) # was updated later # deleting a forecast should update its model's score_change.changed_at before_changed_at = forecast_model2.score_change.changed_at forecast2.delete() self.assertNotEqual(before_changed_at, forecast_model2.score_change.changed_at) self.assertLess( before_changed_at, forecast_model2.score_change.changed_at) # was updated later # bulk-deleting a model's forecasts will update its score_change.changed_at. (this basically tests that a signal # is used instead of a customized delete() - see set_model_changed_at() comment for idx in range(2): csv_file_path = Path( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv' ) # EW01 2017 forecast = load_cdc_csv_forecast_file(2016, forecast_model2, csv_file_path, time_zero) forecast.issue_date += datetime.timedelta( days=idx + 1) # newer version avoids unique constraint errors forecast.save() before_changed_at = forecast_model2.score_change.changed_at forecast_model2.forecasts.all().delete() self.assertNotEqual(before_changed_at, forecast_model2.score_change.changed_at) self.assertLess( before_changed_at, forecast_model2.score_change.changed_at) # was updated later
def test_load_forecast_created_at_field(self): project2 = Project.objects.create() make_cdc_units_and_targets(project2) time_zero2 = TimeZero.objects.create( project=project2, timezero_date=datetime.date.today()) forecast_model2 = ForecastModel.objects.create(project=project2, name='name', abbreviation='abbrev') csv_file_path = Path( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv' ) # EW01 2017 forecast2 = load_cdc_csv_forecast_file(2016, forecast_model2, csv_file_path, time_zero2) self.assertIsNotNone(forecast2.created_at)
def setUpTestData(cls): cls.project = Project.objects.create() cls.time_zero = TimeZero.objects.create(project=cls.project, timezero_date=datetime.date( 2017, 1, 1)) make_cdc_units_and_targets(cls.project) cls.forecast_model = ForecastModel.objects.create( project=cls.project, name='fm1', abbreviation='abbrev') csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv' ) # EW01 2017 cls.forecast = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, cls.time_zero)
def setUpTestData(cls): cls.project = Project.objects.create() make_cdc_units_and_targets(cls.project) cls.forecast_model = ForecastModel.objects.create( project=cls.project, name='name', abbreviation='abbrev') cls.time_zero = TimeZero.objects.create(project=cls.project, timezero_date=datetime.date( 2017, 1, 1)) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv' ) # EW01 2017 cls.forecast = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, cls.time_zero) cls.forecast.issued_at -= datetime.timedelta( days=1) # older version avoids unique constraint errors cls.forecast.save()
def test_load_forecasts_from_dir(self): project2 = Project.objects.create() make_cdc_units_and_targets(project2) TimeZero.objects.create( project=project2, timezero_date=datetime.date( 2016, 10, 23), # 20161023-KoTstable-20161109.cdc.csv data_version_date=None) TimeZero.objects.create( project=project2, timezero_date=datetime.date( 2016, 10, 30), # 20161030-KoTstable-20161114.cdc.csv data_version_date=None) TimeZero.objects.create( project=project2, timezero_date=datetime.date( 2016, 11, 6), # 20161106-KoTstable-20161121.cdc.csv data_version_date=None) forecast_model2 = ForecastModel.objects.create(project=project2, name='name', abbreviation='abbrev') # copy the two files from 'forecast_app/tests/load_forecasts' to a temp dir, run the loader, and then copy a # third file over to test that it skips already-loaded ones with tempfile.TemporaryDirectory() as temp_dir_name: temp_dir = Path(temp_dir_name) test_file_dir = Path('forecast_app/tests/load_forecasts') shutil.copy( str(test_file_dir / '20161023-KoTstable-20161109.cdc.csv'), str(temp_dir)) shutil.copy( str(test_file_dir / '20161030-KoTstable-20161114.cdc.csv'), str(temp_dir)) forecasts = load_cdc_csv_forecasts_from_dir( forecast_model2, temp_dir, 2016) self.assertEqual(2, len(forecasts)) self.assertEqual(2, len(forecast_model2.forecasts.all())) # copy third file and test only new loaded shutil.copy( str(test_file_dir / 'third-file/20161106-KoTstable-20161121.cdc.csv'), str(temp_dir)) forecasts = load_cdc_csv_forecasts_from_dir( forecast_model2, temp_dir, 2016) self.assertEqual(1, len(forecasts))
def test_load_forecast(self): self.assertEqual(1, len(self.forecast_model.forecasts.all())) self.assertIsInstance(self.forecast, Forecast) self.assertEqual('EW1-KoTstable-2017-01-17.csv', self.forecast.source) self.assertEqual(11 * 7 * 2, self.forecast.pred_eles.count() ) # locations * targets * points/bins # check 'US National' targets: spot-check a few point rows act_points_qs = self.forecast.pred_eles.filter( unit__name='US National', pred_class=PredictionElement.POINT_CLASS) self.assertEqual(7, act_points_qs.count()) # test empty file with self.assertRaises(RuntimeError) as context: csv_file_path = Path( 'forecast_app/tests/EW1-bad_file_no_header-2017-01-17.csv' ) # EW01 2017? load_cdc_csv_forecast_file(2016, self.forecast_model, csv_file_path, self.time_zero) self.assertIn('empty file', str(context.exception)) # test a bad data file header with self.assertRaises(RuntimeError) as context: csv_file_path = Path( 'forecast_app/tests/EW1-bad_file_header-2017-01-17.csv' ) # EW01 2017? load_cdc_csv_forecast_file(2016, self.forecast_model, csv_file_path, self.time_zero) self.assertIn('invalid header', str(context.exception)) # test load_forecast() with timezero not in the project project2 = Project.objects.create() # no TimeZeros make_cdc_units_and_targets(project2) forecast_model2 = ForecastModel.objects.create(project=project2, name='name', abbreviation='abbrev') with self.assertRaises(RuntimeError) as context: csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv' ) # EW01 2017 load_cdc_csv_forecast_file(2016, forecast_model2, csv_file_path, self.time_zero) self.assertIn("time_zero was not in project", str(context.exception))
def setUpTestData(cls): cls.project = Project.objects.create() make_cdc_units_and_targets(cls.project) cls.forecast_model = ForecastModel.objects.create( project=cls.project, name='name', abbreviation='abbrev') csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv' ) # EW01 2017 time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2017, 1))) cls.forecast1 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW2-KoTstable-2017-01-23.csv' ) # EW02 2017 time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2017, 2))) cls.forecast2 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW51-KoTstable-2017-01-03.csv' ) # EW51 2016 time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2016, 51))) cls.forecast3 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero) csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW52-KoTstable-2017-01-09.csv' ) # EW52 2016 time_zero = TimeZero.objects.create( project=cls.project, timezero_date=(pymmwr.mmwr_week_to_date(2016, 52))) cls.forecast4 = load_cdc_csv_forecast_file(2016, cls.forecast_model, csv_file_path, time_zero)
def test_d3_foresight_larger(self): project = Project.objects.create() make_cdc_units_and_targets(project) TimeZero.objects.create( project=project, timezero_date=datetime.date(2016, 10, 23), # 20161023-KoTstable-20161109.cdc.csv {'year': 2016, 'week': 43, 'day': 1} data_version_date=None) TimeZero.objects.create( project=project, timezero_date=datetime.date(2016, 10, 30), # 20161030-KoTstable-20161114.cdc.csv {'year': 2016, 'week': 44, 'day': 1} data_version_date=None) TimeZero.objects.create( project=project, timezero_date=datetime.date(2016, 11, 6), # 20161106-KoTstable-20161121.cdc.csv {'year': 2016, 'week': 45, 'day': 1} data_version_date=None) forecast_model1 = ForecastModel.objects.create(project=project, name='forecast_model1', abbreviation='model1') forecast_model2 = ForecastModel.objects.create(project=project, name='forecast_model2', abbreviation='model2') forecast_dir = Path('forecast_app/tests/load_forecasts') load_cdc_csv_forecasts_from_dir(forecast_model1, forecast_dir, 2016) load_cdc_csv_forecasts_from_dir(forecast_model2, forecast_dir / 'third-file', 2016) with open( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small-exp-flusight-data.json', 'r') as fp: exp_json_template_str = fp.read() exp_json_template = Template(exp_json_template_str) exp_json_str = exp_json_template.render( Context({ 'forecast_model1_id': forecast_model1.id, 'forecast_model2_id': forecast_model2.id })) exp_flusight_data_dict = json.loads(exp_json_str) act_flusight_data_dict = flusight_unit_to_data_dict(project, None) self.assertEqual(exp_flusight_data_dict, act_flusight_data_dict)
def test_d3_foresight_out_of_season(self): project = Project.objects.create() make_cdc_units_and_targets(project) # pymmwr.mmwr_week_to_date(2016, 29) -> datetime.date(2016, 7, 17): time_zero = TimeZero.objects.create( project=project, timezero_date=datetime.date(2016, 7, 17), # 29 < SEASON_START_EW_NUMBER data_version_date=None, is_season_start=True, season_name='2016') # 20161030-KoTstable-20161114.cdc.csv {'year': 2016, 'week': 44, 'day': 1} -> datetime.date(2016, 10, 30): TimeZero.objects.create( project=project, timezero_date=datetime.date(2016, 10, 30), data_version_date=None, is_season_start=True, season_name='2017') # season has no forecast data forecast_model = ForecastModel.objects.create( project=project, name='forecast_model1 name', abbreviation='abbrev') csv_file_path = Path( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv' ) # EW01 2017 load_cdc_csv_forecast_file(2016, forecast_model, csv_file_path, time_zero) with open( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small-exp-flusight-no-points.json', 'r') as fp: exp_json_template_str = fp.read() exp_json_template = Template(exp_json_template_str) exp_json_str = exp_json_template.render( Context({'forecast_model_id': forecast_model.id})) exp_flusight_data_dict = json.loads(exp_json_str) act_flusight_data_dict = flusight_unit_to_data_dict( project, '2017') self.assertEqual(exp_flusight_data_dict, act_flusight_data_dict)
def test_load_predictions_from_cdc_csv_file(self): # sanity-check that the predictions get converted and then loaded into the database project = Project.objects.create() make_cdc_units_and_targets(project) forecast_model = ForecastModel.objects.create(project=project, name='model', abbreviation='abbrev') time_zero = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2017, 1, 1)) forecast = Forecast.objects.create(forecast_model=forecast_model, time_zero=time_zero) with open(self.cdc_csv_path) as cdc_csv_fp: json_io_dict = json_io_dict_from_cdc_csv_file(2011, cdc_csv_fp) load_predictions_from_json_io_dict(forecast, json_io_dict, is_validate_cats=False) self.assertEqual( 1 * 7 * 2, forecast.pred_eles.count()) # locations * targets * points/bins
def test_d3_foresight(self): project = Project.objects.create() make_cdc_units_and_targets(project) time_zero = TimeZero.objects.create( project=project, timezero_date=datetime.date(2016, 10, 23), # 20161023-KoTstable-20161109.cdc.csv {'year': 2016, 'week': 43, 'day': 1} data_version_date=datetime.date(2016, 10, 22)) # -> outputs dataVersionTime TimeZero.objects.create( project=project, timezero_date=datetime.date(2016, 10, 30), # 20161030-KoTstable-20161114.cdc.csv {'year': 2016, 'week': 44, 'day': 1} data_version_date=datetime.date(2016, 10, 29)) forecast_model1 = ForecastModel.objects.create( project=project, name='forecast_model1 name', abbreviation='abbrev') csv_file_path = Path( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv' ) # EW01 2017 load_cdc_csv_forecast_file(2016, forecast_model1, csv_file_path, time_zero) # we treat the json file as a Django's template b/c mode lIDs are hard-coded, but can vary depending on the # RDBMS with open( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small-exp-flusight.json', 'r') as fp: exp_json_template_str = fp.read() exp_json_template = Template(exp_json_template_str) exp_json_str = exp_json_template.render( Context({'forecast_model_id': forecast_model1.id})) exp_flusight_data_dict = json.loads(exp_json_str) act_flusight_data_dict = flusight_unit_to_data_dict(project, None) self.assertEqual(exp_flusight_data_dict, act_flusight_data_dict)
def test__tz_unit_targ_pks_to_truth_values(self): # setup project = Project.objects.create() make_cdc_units_and_targets(project) # load truth only for the TimeZero in truths-2016-2017-reichlab.csv we're testing against time_zero = TimeZero.objects.create(project=project, timezero_date=datetime.date( 2017, 1, 1), is_season_start=True, season_name='season1') load_truth_data( project, Path( 'utils/ensemble-truth-table-script/truths-2016-2017-reichlab.csv' )) forecast_model = ForecastModel.objects.create(project=project, name='test model', abbreviation='abbrev') csv_file_path = Path( 'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv' ) # EW01 2017 load_cdc_csv_forecast_file(2016, forecast_model, csv_file_path, time_zero) # test tz_pk = time_zero.pk loc1_pk = Unit.objects.filter(project=project, name='HHS Region 1').first().pk loc2_pk = Unit.objects.filter(project=project, name='HHS Region 2').first().pk loc3_pk = Unit.objects.filter(project=project, name='HHS Region 3').first().pk loc4_pk = Unit.objects.filter(project=project, name='HHS Region 4').first().pk loc5_pk = Unit.objects.filter(project=project, name='HHS Region 5').first().pk loc6_pk = Unit.objects.filter(project=project, name='HHS Region 6').first().pk loc7_pk = Unit.objects.filter(project=project, name='HHS Region 7').first().pk loc8_pk = Unit.objects.filter(project=project, name='HHS Region 8').first().pk loc9_pk = Unit.objects.filter(project=project, name='HHS Region 9').first().pk loc10_pk = Unit.objects.filter(project=project, name='HHS Region 10').first().pk loc11_pk = Unit.objects.filter(project=project, name='US National').first().pk target1_pk = Target.objects.filter(project=project, name='Season onset').first().pk target2_pk = Target.objects.filter(project=project, name='Season peak week').first().pk target3_pk = Target.objects.filter( project=project, name='Season peak percentage').first().pk target4_pk = Target.objects.filter(project=project, name='1 wk ahead').first().pk target5_pk = Target.objects.filter(project=project, name='2 wk ahead').first().pk target6_pk = Target.objects.filter(project=project, name='3 wk ahead').first().pk target7_pk = Target.objects.filter(project=project, name='4 wk ahead').first().pk exp_dict = { # {timezero_pk: {unit_pk: {target_id: truth_value}}} tz_pk: { loc1_pk: { target1_pk: ['2016-12-25'], target2_pk: [datetime.date(2017, 2, 5)], target3_pk: [3.19221], target4_pk: [1.52411], target5_pk: [1.73987], target6_pk: [2.06524], target7_pk: [2.51375] }, loc2_pk: { target1_pk: ['2016-11-20'], target2_pk: [datetime.date(2017, 2, 5)], target3_pk: [6.93759], target4_pk: [5.07086], target5_pk: [5.68166], target6_pk: [6.01053], target7_pk: [6.49829] }, loc3_pk: { target1_pk: ['2016-12-18'], target2_pk: [datetime.date(2017, 2, 12)], target3_pk: [5.20003], target4_pk: [2.81366], target5_pk: [3.09968], target6_pk: [3.45232], target7_pk: [3.73339] }, loc4_pk: { target1_pk: ['2016-11-13'], target2_pk: [datetime.date(2017, 2, 12)], target3_pk: [5.5107], target4_pk: [2.89395], target5_pk: [3.68564], target6_pk: [3.69188], target7_pk: [4.53169] }, loc5_pk: { target1_pk: ['2016-12-25'], target2_pk: [datetime.date(2017, 2, 12)], target3_pk: [4.31787], target4_pk: [2.11757], target5_pk: [2.4432], target6_pk: [2.76295], target7_pk: [3.182] }, loc6_pk: { target1_pk: ['2017-01-08'], target2_pk: [datetime.date(2017, 2, 5)], target3_pk: [9.87589], target4_pk: [4.80185], target5_pk: [5.26955], target6_pk: [6.10427], target7_pk: [8.13221] }, loc7_pk: { target1_pk: ['2016-12-25'], target2_pk: [datetime.date(2017, 2, 5)], target3_pk: [6.35948], target4_pk: [2.75581], target5_pk: [3.46528], target6_pk: [4.56991], target7_pk: [5.52653] }, loc8_pk: { target1_pk: ['2016-12-18'], target2_pk: [datetime.date(2017, 2, 12)], target3_pk: [2.72703], target4_pk: [1.90851], target5_pk: [2.2668], target6_pk: [2.07104], target7_pk: [2.27632] }, loc9_pk: { target1_pk: ['2016-12-18'], target2_pk: [datetime.date(2016, 12, 25)], target3_pk: [3.30484], target4_pk: [2.83778], target5_pk: [2.68071], target6_pk: [2.9577], target7_pk: [3.03987] }, loc10_pk: { target1_pk: ['2016-12-11'], target2_pk: [datetime.date(2016, 12, 25)], target3_pk: [3.67061], target4_pk: [2.15197], target5_pk: [3.25108], target6_pk: [2.51434], target7_pk: [2.28634] }, loc11_pk: { target1_pk: ['2016-12-11'], target2_pk: [datetime.date(2017, 2, 5)], target3_pk: [5.06094], target4_pk: [3.07623], target5_pk: [3.50708], target6_pk: [3.79872], target7_pk: [4.43601] } } } act_dict = _tz_unit_targ_pks_to_truth_values(forecast_model.project) self.assertEqual(exp_dict, act_dict)
def test_load_forecast(self): self.assertEqual(1, len(self.forecast_model.forecasts.all())) self.assertIsInstance(self.forecast, Forecast) self.assertEqual('EW1-KoTstable-2017-01-17.csv', self.forecast.source) self.assertEqual(8019, self.forecast.get_num_rows()) # excluding header # check 'US National' targets: spot-check a few point rows exp_points = [ ('US National', '1 wk ahead', None, 3.00101461253164, None, None, None), # _i, _f, _t, _d, _b ('US National', '2 wk ahead', None, 2.72809349594878, None, None, None), ('US National', '3 wk ahead', None, 2.5332588357381, None, None, None), ('US National', '4 wk ahead', None, 2.42985946508278, None, None, None), ('US National', 'Season onset', None, None, '2016-12-12', None, None), ('US National', 'Season peak percentage', None, 3.30854920241938, None, None, None), ('US National', 'Season peak week', None, None, None, datetime.date(2017, 1, 30), None) ] act_points_qs = self.forecast.point_prediction_qs() \ .filter(unit__name='US National') \ .order_by('unit__name', 'target__name') \ .values_list('unit__name', 'target__name', 'value_i', 'value_f', 'value_t', 'value_d', 'value_b') self.assertEqual(exp_points, list(act_points_qs)) # test empty file with self.assertRaises(RuntimeError) as context: csv_file_path = Path( 'forecast_app/tests/EW1-bad_file_no_header-2017-01-17.csv' ) # EW01 2017? load_cdc_csv_forecast_file(2016, self.forecast_model, csv_file_path, self.time_zero) self.assertIn('empty file', str(context.exception)) # test a bad data file header with self.assertRaises(RuntimeError) as context: csv_file_path = Path( 'forecast_app/tests/EW1-bad_file_header-2017-01-17.csv' ) # EW01 2017? load_cdc_csv_forecast_file(2016, self.forecast_model, csv_file_path, self.time_zero) self.assertIn('invalid header', str(context.exception)) # test load_forecast() with timezero not in the project project2 = Project.objects.create() # no TimeZeros make_cdc_units_and_targets(project2) forecast_model2 = ForecastModel.objects.create(project=project2, name='name', abbreviation='abbrev') with self.assertRaises(RuntimeError) as context: csv_file_path = Path( 'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv' ) # EW01 2017 load_cdc_csv_forecast_file(2016, forecast_model2, csv_file_path, self.time_zero) self.assertIn("time_zero was not in project", str(context.exception))
def test_load_truth_data_other_files(self): # test truth files that used to be in yyyymmdd or yyyyww (EW) formats # truths-ok.csv (2017-01-17-truths.csv would basically test the same) load_truth_data(self.project, Path('forecast_app/tests/truth_data/truths-ok.csv'), is_convert_na_none=True) exp_rows = [ (datetime.date(2017, 1, 1), 'US National', '1 wk ahead', 0.73102), (datetime.date(2017, 1, 1), 'US National', '2 wk ahead', 0.688338), (datetime.date(2017, 1, 1), 'US National', '3 wk ahead', 0.732049), (datetime.date(2017, 1, 1), 'US National', '4 wk ahead', 0.911641), (datetime.date(2017, 1, 1), 'US National', 'Season onset', '2017-11-20') ] # note: https://code.djangoproject.com/ticket/32483 sqlite3 json query bug -> we manually access field instead # of using 'data__value' pred_data_qs = PredictionData.objects \ .filter(pred_ele__forecast__forecast_model=oracle_model_for_project(self.project)) \ .values_list('pred_ele__forecast__time_zero__timezero_date', 'pred_ele__unit__name', 'pred_ele__target__name', 'data') act_rows = [(tz_date, unit__name, target__name, data['value']) for tz_date, unit__name, target__name, data in pred_data_qs ] self.assertEqual(sorted(exp_rows), sorted(list(act_rows))) # truths-2016-2017-reichlab-small.csv project2 = Project.objects.create() TimeZero.objects.create(project=project2, timezero_date=datetime.date(2016, 10, 30)) make_cdc_units_and_targets(project2) load_truth_data( project2, Path( 'forecast_app/tests/truth_data/truths-2016-2017-reichlab-small.csv' ), is_convert_na_none=True) exp_rows = [(datetime.date(2016, 10, 30), 'US National', '1 wk ahead', 1.55838), (datetime.date(2016, 10, 30), 'US National', '2 wk ahead', 1.64639), (datetime.date(2016, 10, 30), 'US National', '3 wk ahead', 1.91196), (datetime.date(2016, 10, 30), 'US National', '4 wk ahead', 1.81129), (datetime.date(2016, 10, 30), 'US National', 'Season onset', '2016-12-11'), (datetime.date(2016, 10, 30), 'US National', 'Season peak percentage', 5.06094), (datetime.date(2016, 10, 30), 'US National', 'Season peak week', '2017-02-05')] # note: https://code.djangoproject.com/ticket/32483 sqlite3 json query bug -> we manually access field instead # of using 'data__value' pred_data_qs = PredictionData.objects \ .filter(pred_ele__forecast__forecast_model=oracle_model_for_project(project2)) \ .values_list('pred_ele__forecast__time_zero__timezero_date', 'pred_ele__unit__name', 'pred_ele__target__name', 'data') act_rows = [(tz_date, unit__name, target__name, data['value']) for tz_date, unit__name, target__name, data in pred_data_qs ] self.assertEqual(sorted(exp_rows), sorted(list(act_rows)))
def test_mae(self): project2 = Project.objects.create() make_cdc_units_and_targets(project2) TimeZero.objects.create(project=project2, timezero_date=datetime.date(2016, 10, 23), is_season_start=True, season_name='s1') TimeZero.objects.create(project=project2, timezero_date=datetime.date(2016, 10, 30)) TimeZero.objects.create(project=project2, timezero_date=datetime.date(2016, 11, 6)) forecast_model2 = ForecastModel.objects.create(project=project2, name='name', abbreviation='abbrev') load_cdc_csv_forecasts_from_dir( forecast_model2, Path('forecast_app/tests/load_forecasts'), 2016) load_truth_data( project2, Path( 'utils/ensemble-truth-table-script/truths-2016-2017-reichlab.csv' )) Score.ensure_all_scores_exist() score = Score.objects.filter( abbreviation='abs_error').first() # hard-coded official abbrev score.update_score_for_model(forecast_model2) score_value_rows_for_season = _score_value_rows_for_season( project2, 's1') self.assertEqual( 5 * 11, len(score_value_rows_for_season)) # 5 targets * 11 units # spot-check a unit exp_maes = [ 0.1830079332082548, 0.127335480231265, 0.040631614561185525, 0.09119562794624952, 0.15125133156909953 ] hhs1_loc = project2.units.filter(name='HHS Region 1').first() hhs1_loc_rows = filter(lambda row: row[0] == hhs1_loc.id, score_value_rows_for_season) act_maes = [row[-1] for row in hhs1_loc_rows] for exp_mae, act_mae in zip(exp_maes, act_maes): self.assertAlmostEqual(exp_mae, act_mae) # test unit_to_mean_abs_error_rows_for_project(), since we have a nice fixture loc_to_mae_rows_no_season = unit_to_mean_abs_error_rows_for_project( project2, None) self.assertEqual(loc_to_mae_rows_no_season, unit_to_mean_abs_error_rows_for_project( project2, 's1')) # season_name shouldn't matter self.assertEqual(set(project2.units.values_list('name', flat=True)), set(loc_to_mae_rows_no_season)) exp_rows = [[ 'Model', '1 wk ahead', '2 wk ahead', '3 wk ahead', '4 wk ahead', 'Season peak percentage' ], [ forecast_model2.pk, 0.127335480231265, 0.040631614561185525, 0.09119562794624952, 0.15125133156909953, 0.1830079332082548 ]] act_rows = loc_to_mae_rows_no_season[hhs1_loc.name][0] self.assertEqual(exp_rows[0], act_rows[0]) # header self.assertEqual(exp_rows[1][0], act_rows[1][0]) # model self.assertAlmostEqual(exp_rows[1][1], act_rows[1][1]) # 1 wk ahead self.assertAlmostEqual(exp_rows[1][2], act_rows[1][2]) self.assertAlmostEqual(exp_rows[1][3], act_rows[1][3]) self.assertAlmostEqual(exp_rows[1][4], act_rows[1][4]) self.assertAlmostEqual(exp_rows[1][5], act_rows[1][5]) target_spp = project2.targets.filter( name='Season peak percentage').first() target_1wk = project2.targets.filter(name='1 wk ahead').first() target_2wk = project2.targets.filter(name='2 wk ahead').first() target_3wk = project2.targets.filter(name='3 wk ahead').first() target_4wk = project2.targets.filter(name='4 wk ahead').first() exp_loc_to_min = { target_spp: 0.1830079332082548, target_1wk: 0.127335480231265, target_2wk: 0.040631614561185525, target_3wk: 0.09119562794624952, target_4wk: 0.15125133156909953 } act_loc_to_min = loc_to_mae_rows_no_season[hhs1_loc.name][1] self.assertAlmostEqual(exp_loc_to_min[target_spp], act_loc_to_min[target_spp.name]) self.assertAlmostEqual(exp_loc_to_min[target_1wk], act_loc_to_min[target_1wk.name]) self.assertAlmostEqual(exp_loc_to_min[target_2wk], act_loc_to_min[target_2wk.name]) self.assertAlmostEqual(exp_loc_to_min[target_3wk], act_loc_to_min[target_3wk.name]) self.assertAlmostEqual(exp_loc_to_min[target_4wk], act_loc_to_min[target_4wk.name])