def test_load_predictions_from_cdc_csv_file(self):
        # sanity-check that the predictions get converted and then loaded into the database
        project = Project.objects.create()
        make_cdc_units_and_targets(project)

        forecast_model = ForecastModel.objects.create(project=project,
                                                      name='model',
                                                      abbreviation='abbrev')
        time_zero = TimeZero.objects.create(project=project,
                                            timezero_date=datetime.date(
                                                2017, 1, 1))
        cdc_csv_path = Path(
            'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv'
        )  # EW01 2017
        forecast = Forecast.objects.create(forecast_model=forecast_model,
                                           source=cdc_csv_path.name,
                                           time_zero=time_zero)

        with open(self.cdc_csv_path) as cdc_csv_fp:
            json_io_dict = json_io_dict_from_cdc_csv_file(2011, cdc_csv_fp)
            load_predictions_from_json_io_dict(forecast, json_io_dict, False)
        self.assertEqual(729, forecast.get_num_rows())
        self.assertEqual(722,
                         forecast.bin_distribution_qs().count())  # 729 - 7
        self.assertEqual(0, forecast.named_distribution_qs().count())
        self.assertEqual(7, forecast.point_prediction_qs().count())
        self.assertEqual(0, forecast.sample_distribution_qs().count())
        self.assertEqual(0, forecast.quantile_prediction_qs().count())
Exemplo n.º 2
0
    def test_model_score_change_truths(self):
        project2 = Project.objects.create()
        make_cdc_units_and_targets(project2)
        # adding project truth should update all of its models' score_change.changed_at. test with no models -> ensure
        # Project._update_model_score_changes() is called
        with patch('forecast_app.models.Project._update_model_score_changes'
                   ) as update_mock:
            load_truth_data(
                project2, Path('forecast_app/tests/truth_data/truths-ok.csv'))
            self.assertEqual(
                2, update_mock.call_count
            )  # called once each: delete_truth_data(), load_truth_data()

        # adding project truth should update all of its models' score_change.changed_at. test with one model
        forecast_model2 = ForecastModel.objects.create(project=project2,
                                                       name='name',
                                                       abbreviation='abbrev')
        before_changed_at = forecast_model2.score_change.changed_at
        load_truth_data(project2,
                        Path('forecast_app/tests/truth_data/truths-ok.csv'))
        forecast_model2.score_change.refresh_from_db()
        self.assertNotEqual(before_changed_at,
                            forecast_model2.score_change.changed_at)

        # deleting project truth should update all of its models' score_change.changed_at
        before_changed_at = forecast_model2.score_change.changed_at
        project2.delete_truth_data()
        forecast_model2.score_change.refresh_from_db()
        self.assertNotEqual(before_changed_at,
                            forecast_model2.score_change.changed_at)
Exemplo n.º 3
0
    def setUpTestData(cls):
        cls.project = Project.objects.create()
        make_cdc_units_and_targets(cls.project)
        cls.forecast_model = ForecastModel.objects.create(
            project=cls.project, name='name', abbreviation='abbrev')

        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2017, 1)))
        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv'
        )  # EW01 2017
        cls.forecast1 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2017, 2)))
        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW2-KoTstable-2017-01-23.csv'
        )  # EW02 2017
        cls.forecast2 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2016, 51)))
        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW51-KoTstable-2017-01-03.csv'
        )  # EW51 2016
        cls.forecast3 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2016, 52)))
        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW52-KoTstable-2017-01-09.csv'
        )  # EW52 2016
        cls.forecast4 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        # 'mini' season for testing. from:
        #   model_error_calculations.txt -> model_error_calculations.py -> model_error_calculations.xlsx:
        cls.exp_target_to_mae = {
            '1 wk ahead': 0.215904853,
            '2 wk ahead': 0.458186984,
            '3 wk ahead': 0.950515864,
            '4 wk ahead': 1.482010693
        }
        load_truth_data(
            cls.project,
            Path('forecast_app/tests/truth_data/mean-abs-error-truths.csv'))

        # score needed for MAE calculation
        Score.ensure_all_scores_exist()
        cls.score = Score.objects.filter(
            abbreviation='abs_error').first()  # hard-coded official abbrev
        cls.score.update_score_for_model(cls.forecast_model)
Exemplo n.º 4
0
def fill_cdc_project(project, mo_user, is_public):
    project.description = "description"
    project.home_url = "http://example.com/"
    project.core_data = "http://example.com/"

    # make the Units and Targets via cdc-project.json (recall it has no timezeros)
    make_cdc_units_and_targets(project)

    # make two TimeZeros - one for ground truth, and one for the forecast's data:
    # EW1-KoTsarima-2017-01-17-small.csv -> pymmwr.date_to_mmwr_week(datetime.date(2017, 1, 17))  # EW01 2017
    #   -> {'year': 2017, 'week': 3, 'day': 3}
    time_zero1 = TimeZero.objects.create(project=project,
                                         timezero_date=datetime.date(
                                             2017, 1, 17),
                                         data_version_date=None)
    TimeZero.objects.create(project=project,
                            timezero_date=datetime.date(2017, 1, 24),
                            data_version_date=None)

    # load ground truth
    load_truth_data(
        project,
        Path('forecast_app/tests/truth_data/2017-01-17-truths.csv'),
        is_convert_na_none=True)

    # create the two models
    click.echo("creating ForecastModel")
    forecast_model1 = ForecastModel.objects.create(
        project=project,
        name=f'Test ForecastModel1 ({"public" if is_public else "private"})',
        abbreviation='model1_abbrev',
        team_name='ForecastModel1 team',
        description="a ForecastModel for testing",
        home_url='http://example.com',
        owner=mo_user)

    # load the forecasts using a small data file
    csv_file_path = Path(
        'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv')  # EW01 2017
    click.echo(
        "* loading forecast into forecast_model={}, csv_file_path={}".format(
            forecast_model1, csv_file_path))
    start_time = timeit.default_timer()
    forecast1 = load_cdc_csv_forecast_file(2016, forecast_model1,
                                           csv_file_path, time_zero1)
    click.echo("  loaded forecast={}. {}".format(
        forecast1,
        timeit.default_timer() - start_time))

    ForecastModel.objects.create(
        project=project,
        name=f'Test ForecastModel2 ({"public" if is_public else "private"})',
        abbreviation='model2_abbrev',
        # team_name='ForecastModel2 team',  # leave default ('')
        description="a second ForecastModel for testing",
        home_url='http://example.com',
        owner=mo_user)
Exemplo n.º 5
0
    def test_load_truth_data(self):
        load_truth_data(self.project,
                        Path('forecast_app/tests/truth_data/truths-ok.csv'),
                        is_convert_na_none=True)
        self.assertEqual(5, truth_data_qs(self.project).count())
        self.assertTrue(is_truth_data_loaded(self.project))

        # csv references non-existent TimeZero in Project: the bad timezero 2017-01-02 is skipped by
        # _read_truth_data_rows(), but the remaining data that's loaded (the three 2017-01-01 rows) is therefore a
        # subset. this raised 'new data is a subset of previous' prior to this issue:
        # [support truth "diff" uploads #319](https://github.com/reichlab/forecast-repository/issues/319), but now
        # subsets are allowed.
        load_truth_data(
            self.project,
            Path('forecast_app/tests/truth_data/truths-bad-timezero.csv'),
            'truths-bad-timezero.csv',
            is_convert_na_none=True)

        # csv references non-existent unit in Project: the bad unit is skipped, again resulting in a subset. again,
        # subsets are now allowed
        load_truth_data(
            self.project,
            Path('forecast_app/tests/truth_data/truths-bad-location.csv'),
            'truths-bad-location.csv',
            is_convert_na_none=True)

        # csv references non-existent target in Project: the bad target is skipped. subset is allowed
        load_truth_data(
            self.project,
            Path('forecast_app/tests/truth_data/truths-bad-target.csv'),
            'truths-bad-target.csv',
            is_convert_na_none=True)

        project2 = Project.objects.create()
        make_cdc_units_and_targets(project2)
        self.assertEqual(0, truth_data_qs(project2).count())
        self.assertFalse(is_truth_data_loaded(project2))

        TimeZero.objects.create(project=project2,
                                timezero_date=datetime.date(2017, 1, 1))
        load_truth_data(project2,
                        Path('forecast_app/tests/truth_data/truths-ok.csv'),
                        is_convert_na_none=True)
        self.assertEqual(5, truth_data_qs(project2).count())

        # test get_truth_data_preview()
        exp_truth_preview = [
            (datetime.date(2017, 1, 1), 'US National', '1 wk ahead', 0.73102),
            (datetime.date(2017, 1, 1), 'US National', '2 wk ahead', 0.688338),
            (datetime.date(2017, 1, 1), 'US National', '3 wk ahead', 0.732049),
            (datetime.date(2017, 1, 1), 'US National', '4 wk ahead', 0.911641),
            (datetime.date(2017, 1,
                           1), 'US National', 'Season onset', '2017-11-20')
        ]
        self.assertEqual(sorted(exp_truth_preview),
                         sorted(get_truth_data_preview(project2)))
Exemplo n.º 6
0
    def test_model_score_change_forecasts(self):
        # creating a new model should set its score_change.changed_at
        project2 = Project.objects.create()
        make_cdc_units_and_targets(project2)
        time_zero = TimeZero.objects.create(
            project=project2, timezero_date=datetime.date.today())
        forecast_model2 = ForecastModel.objects.create(project=project2,
                                                       name='name',
                                                       abbreviation='abbrev')
        self.assertIsInstance(forecast_model2.score_change.changed_at,
                              datetime.datetime)

        # adding a forecast should update its model's score_change.changed_at
        before_changed_at = forecast_model2.score_change.changed_at
        csv_file_path = Path(
            'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv'
        )  # EW01 2017
        forecast2 = load_cdc_csv_forecast_file(2016, forecast_model2,
                                               csv_file_path, time_zero)
        self.assertNotEqual(before_changed_at,
                            forecast_model2.score_change.changed_at)
        self.assertLess(
            before_changed_at,
            forecast_model2.score_change.changed_at)  # was updated later

        # deleting a forecast should update its model's score_change.changed_at
        before_changed_at = forecast_model2.score_change.changed_at
        forecast2.delete()
        self.assertNotEqual(before_changed_at,
                            forecast_model2.score_change.changed_at)
        self.assertLess(
            before_changed_at,
            forecast_model2.score_change.changed_at)  # was updated later

        # bulk-deleting a model's forecasts will update its score_change.changed_at. (this basically tests that a signal
        # is used instead of a customized delete() - see set_model_changed_at() comment
        for idx in range(2):
            csv_file_path = Path(
                'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv'
            )  # EW01 2017
            forecast = load_cdc_csv_forecast_file(2016, forecast_model2,
                                                  csv_file_path, time_zero)
            forecast.issue_date += datetime.timedelta(
                days=idx + 1)  # newer version avoids unique constraint errors
            forecast.save()
        before_changed_at = forecast_model2.score_change.changed_at
        forecast_model2.forecasts.all().delete()
        self.assertNotEqual(before_changed_at,
                            forecast_model2.score_change.changed_at)
        self.assertLess(
            before_changed_at,
            forecast_model2.score_change.changed_at)  # was updated later
Exemplo n.º 7
0
 def test_load_forecast_created_at_field(self):
     project2 = Project.objects.create()
     make_cdc_units_and_targets(project2)
     time_zero2 = TimeZero.objects.create(
         project=project2, timezero_date=datetime.date.today())
     forecast_model2 = ForecastModel.objects.create(project=project2,
                                                    name='name',
                                                    abbreviation='abbrev')
     csv_file_path = Path(
         'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv'
     )  # EW01 2017
     forecast2 = load_cdc_csv_forecast_file(2016, forecast_model2,
                                            csv_file_path, time_zero2)
     self.assertIsNotNone(forecast2.created_at)
Exemplo n.º 8
0
    def setUpTestData(cls):
        cls.project = Project.objects.create()
        cls.time_zero = TimeZero.objects.create(project=cls.project,
                                                timezero_date=datetime.date(
                                                    2017, 1, 1))
        make_cdc_units_and_targets(cls.project)

        cls.forecast_model = ForecastModel.objects.create(
            project=cls.project, name='fm1', abbreviation='abbrev')
        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv'
        )  # EW01 2017
        cls.forecast = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                  csv_file_path, cls.time_zero)
Exemplo n.º 9
0
 def setUpTestData(cls):
     cls.project = Project.objects.create()
     make_cdc_units_and_targets(cls.project)
     cls.forecast_model = ForecastModel.objects.create(
         project=cls.project, name='name', abbreviation='abbrev')
     cls.time_zero = TimeZero.objects.create(project=cls.project,
                                             timezero_date=datetime.date(
                                                 2017, 1, 1))
     csv_file_path = Path(
         'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv'
     )  # EW01 2017
     cls.forecast = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                               csv_file_path, cls.time_zero)
     cls.forecast.issued_at -= datetime.timedelta(
         days=1)  # older version avoids unique constraint errors
     cls.forecast.save()
Exemplo n.º 10
0
    def test_load_forecasts_from_dir(self):
        project2 = Project.objects.create()
        make_cdc_units_and_targets(project2)
        TimeZero.objects.create(
            project=project2,
            timezero_date=datetime.date(
                2016, 10, 23),  # 20161023-KoTstable-20161109.cdc.csv
            data_version_date=None)
        TimeZero.objects.create(
            project=project2,
            timezero_date=datetime.date(
                2016, 10, 30),  # 20161030-KoTstable-20161114.cdc.csv
            data_version_date=None)
        TimeZero.objects.create(
            project=project2,
            timezero_date=datetime.date(
                2016, 11, 6),  # 20161106-KoTstable-20161121.cdc.csv
            data_version_date=None)
        forecast_model2 = ForecastModel.objects.create(project=project2,
                                                       name='name',
                                                       abbreviation='abbrev')

        # copy the two files from 'forecast_app/tests/load_forecasts' to a temp dir, run the loader, and then copy a
        # third file over to test that it skips already-loaded ones
        with tempfile.TemporaryDirectory() as temp_dir_name:
            temp_dir = Path(temp_dir_name)
            test_file_dir = Path('forecast_app/tests/load_forecasts')
            shutil.copy(
                str(test_file_dir / '20161023-KoTstable-20161109.cdc.csv'),
                str(temp_dir))
            shutil.copy(
                str(test_file_dir / '20161030-KoTstable-20161114.cdc.csv'),
                str(temp_dir))

            forecasts = load_cdc_csv_forecasts_from_dir(
                forecast_model2, temp_dir, 2016)
            self.assertEqual(2, len(forecasts))
            self.assertEqual(2, len(forecast_model2.forecasts.all()))

            # copy third file and test only new loaded
            shutil.copy(
                str(test_file_dir /
                    'third-file/20161106-KoTstable-20161121.cdc.csv'),
                str(temp_dir))
            forecasts = load_cdc_csv_forecasts_from_dir(
                forecast_model2, temp_dir, 2016)
            self.assertEqual(1, len(forecasts))
Exemplo n.º 11
0
    def test_load_forecast(self):
        self.assertEqual(1, len(self.forecast_model.forecasts.all()))
        self.assertIsInstance(self.forecast, Forecast)
        self.assertEqual('EW1-KoTstable-2017-01-17.csv', self.forecast.source)
        self.assertEqual(11 * 7 * 2, self.forecast.pred_eles.count()
                         )  # locations * targets * points/bins

        # check 'US National' targets: spot-check a few point rows
        act_points_qs = self.forecast.pred_eles.filter(
            unit__name='US National', pred_class=PredictionElement.POINT_CLASS)
        self.assertEqual(7, act_points_qs.count())

        # test empty file
        with self.assertRaises(RuntimeError) as context:
            csv_file_path = Path(
                'forecast_app/tests/EW1-bad_file_no_header-2017-01-17.csv'
            )  # EW01 2017?
            load_cdc_csv_forecast_file(2016, self.forecast_model,
                                       csv_file_path, self.time_zero)
        self.assertIn('empty file', str(context.exception))

        # test a bad data file header
        with self.assertRaises(RuntimeError) as context:
            csv_file_path = Path(
                'forecast_app/tests/EW1-bad_file_header-2017-01-17.csv'
            )  # EW01 2017?
            load_cdc_csv_forecast_file(2016, self.forecast_model,
                                       csv_file_path, self.time_zero)
        self.assertIn('invalid header', str(context.exception))

        # test load_forecast() with timezero not in the project
        project2 = Project.objects.create()  # no TimeZeros
        make_cdc_units_and_targets(project2)

        forecast_model2 = ForecastModel.objects.create(project=project2,
                                                       name='name',
                                                       abbreviation='abbrev')
        with self.assertRaises(RuntimeError) as context:
            csv_file_path = Path(
                'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv'
            )  # EW01 2017
            load_cdc_csv_forecast_file(2016, forecast_model2, csv_file_path,
                                       self.time_zero)
        self.assertIn("time_zero was not in project", str(context.exception))
    def setUpTestData(cls):
        cls.project = Project.objects.create()
        make_cdc_units_and_targets(cls.project)

        cls.forecast_model = ForecastModel.objects.create(
            project=cls.project, name='name', abbreviation='abbrev')

        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv'
        )  # EW01 2017
        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2017, 1)))
        cls.forecast1 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW2-KoTstable-2017-01-23.csv'
        )  # EW02 2017
        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2017, 2)))
        cls.forecast2 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW51-KoTstable-2017-01-03.csv'
        )  # EW51 2016
        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2016, 51)))
        cls.forecast3 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)

        csv_file_path = Path(
            'forecast_app/tests/model_error/ensemble/EW52-KoTstable-2017-01-09.csv'
        )  # EW52 2016
        time_zero = TimeZero.objects.create(
            project=cls.project,
            timezero_date=(pymmwr.mmwr_week_to_date(2016, 52)))
        cls.forecast4 = load_cdc_csv_forecast_file(2016, cls.forecast_model,
                                                   csv_file_path, time_zero)
 def test_d3_foresight_larger(self):
     project = Project.objects.create()
     make_cdc_units_and_targets(project)
     TimeZero.objects.create(
         project=project,
         timezero_date=datetime.date(2016, 10, 23),
         # 20161023-KoTstable-20161109.cdc.csv {'year': 2016, 'week': 43, 'day': 1}
         data_version_date=None)
     TimeZero.objects.create(
         project=project,
         timezero_date=datetime.date(2016, 10, 30),
         # 20161030-KoTstable-20161114.cdc.csv {'year': 2016, 'week': 44, 'day': 1}
         data_version_date=None)
     TimeZero.objects.create(
         project=project,
         timezero_date=datetime.date(2016, 11, 6),
         # 20161106-KoTstable-20161121.cdc.csv {'year': 2016, 'week': 45, 'day': 1}
         data_version_date=None)
     forecast_model1 = ForecastModel.objects.create(project=project,
                                                    name='forecast_model1',
                                                    abbreviation='model1')
     forecast_model2 = ForecastModel.objects.create(project=project,
                                                    name='forecast_model2',
                                                    abbreviation='model2')
     forecast_dir = Path('forecast_app/tests/load_forecasts')
     load_cdc_csv_forecasts_from_dir(forecast_model1, forecast_dir, 2016)
     load_cdc_csv_forecasts_from_dir(forecast_model2,
                                     forecast_dir / 'third-file', 2016)
     with open(
             'forecast_app/tests/EW1-KoTsarima-2017-01-17-small-exp-flusight-data.json',
             'r') as fp:
         exp_json_template_str = fp.read()
         exp_json_template = Template(exp_json_template_str)
         exp_json_str = exp_json_template.render(
             Context({
                 'forecast_model1_id': forecast_model1.id,
                 'forecast_model2_id': forecast_model2.id
             }))
         exp_flusight_data_dict = json.loads(exp_json_str)
         act_flusight_data_dict = flusight_unit_to_data_dict(project, None)
         self.assertEqual(exp_flusight_data_dict, act_flusight_data_dict)
 def test_d3_foresight_out_of_season(self):
     project = Project.objects.create()
     make_cdc_units_and_targets(project)
     # pymmwr.mmwr_week_to_date(2016, 29) -> datetime.date(2016, 7, 17):
     time_zero = TimeZero.objects.create(
         project=project,
         timezero_date=datetime.date(2016, 7,
                                     17),  # 29 < SEASON_START_EW_NUMBER
         data_version_date=None,
         is_season_start=True,
         season_name='2016')
     # 20161030-KoTstable-20161114.cdc.csv {'year': 2016, 'week': 44, 'day': 1} -> datetime.date(2016, 10, 30):
     TimeZero.objects.create(
         project=project,
         timezero_date=datetime.date(2016, 10, 30),
         data_version_date=None,
         is_season_start=True,
         season_name='2017')  # season has no forecast data
     forecast_model = ForecastModel.objects.create(
         project=project,
         name='forecast_model1 name',
         abbreviation='abbrev')
     csv_file_path = Path(
         'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv'
     )  # EW01 2017
     load_cdc_csv_forecast_file(2016, forecast_model, csv_file_path,
                                time_zero)
     with open(
             'forecast_app/tests/EW1-KoTsarima-2017-01-17-small-exp-flusight-no-points.json',
             'r') as fp:
         exp_json_template_str = fp.read()
         exp_json_template = Template(exp_json_template_str)
         exp_json_str = exp_json_template.render(
             Context({'forecast_model_id': forecast_model.id}))
         exp_flusight_data_dict = json.loads(exp_json_str)
         act_flusight_data_dict = flusight_unit_to_data_dict(
             project, '2017')
         self.assertEqual(exp_flusight_data_dict, act_flusight_data_dict)
Exemplo n.º 15
0
    def test_load_predictions_from_cdc_csv_file(self):
        # sanity-check that the predictions get converted and then loaded into the database
        project = Project.objects.create()
        make_cdc_units_and_targets(project)

        forecast_model = ForecastModel.objects.create(project=project,
                                                      name='model',
                                                      abbreviation='abbrev')
        time_zero = TimeZero.objects.create(project=project,
                                            timezero_date=datetime.date(
                                                2017, 1, 1))
        forecast = Forecast.objects.create(forecast_model=forecast_model,
                                           time_zero=time_zero)

        with open(self.cdc_csv_path) as cdc_csv_fp:
            json_io_dict = json_io_dict_from_cdc_csv_file(2011, cdc_csv_fp)
            load_predictions_from_json_io_dict(forecast,
                                               json_io_dict,
                                               is_validate_cats=False)

        self.assertEqual(
            1 * 7 * 2,
            forecast.pred_eles.count())  # locations * targets * points/bins
    def test_d3_foresight(self):
        project = Project.objects.create()
        make_cdc_units_and_targets(project)
        time_zero = TimeZero.objects.create(
            project=project,
            timezero_date=datetime.date(2016, 10, 23),
            # 20161023-KoTstable-20161109.cdc.csv {'year': 2016, 'week': 43, 'day': 1}
            data_version_date=datetime.date(2016, 10,
                                            22))  # -> outputs dataVersionTime
        TimeZero.objects.create(
            project=project,
            timezero_date=datetime.date(2016, 10, 30),
            # 20161030-KoTstable-20161114.cdc.csv {'year': 2016, 'week': 44, 'day': 1}
            data_version_date=datetime.date(2016, 10, 29))
        forecast_model1 = ForecastModel.objects.create(
            project=project,
            name='forecast_model1 name',
            abbreviation='abbrev')
        csv_file_path = Path(
            'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv'
        )  # EW01 2017
        load_cdc_csv_forecast_file(2016, forecast_model1, csv_file_path,
                                   time_zero)

        # we treat the json file as a Django's template b/c mode lIDs are hard-coded, but can vary depending on the
        # RDBMS
        with open(
                'forecast_app/tests/EW1-KoTsarima-2017-01-17-small-exp-flusight.json',
                'r') as fp:
            exp_json_template_str = fp.read()
            exp_json_template = Template(exp_json_template_str)
            exp_json_str = exp_json_template.render(
                Context({'forecast_model_id': forecast_model1.id}))
            exp_flusight_data_dict = json.loads(exp_json_str)
            act_flusight_data_dict = flusight_unit_to_data_dict(project, None)
            self.assertEqual(exp_flusight_data_dict, act_flusight_data_dict)
Exemplo n.º 17
0
    def test__tz_unit_targ_pks_to_truth_values(self):
        # setup
        project = Project.objects.create()
        make_cdc_units_and_targets(project)

        # load truth only for the TimeZero in truths-2016-2017-reichlab.csv we're testing against
        time_zero = TimeZero.objects.create(project=project,
                                            timezero_date=datetime.date(
                                                2017, 1, 1),
                                            is_season_start=True,
                                            season_name='season1')
        load_truth_data(
            project,
            Path(
                'utils/ensemble-truth-table-script/truths-2016-2017-reichlab.csv'
            ))

        forecast_model = ForecastModel.objects.create(project=project,
                                                      name='test model',
                                                      abbreviation='abbrev')
        csv_file_path = Path(
            'forecast_app/tests/EW1-KoTsarima-2017-01-17-small.csv'
        )  # EW01 2017
        load_cdc_csv_forecast_file(2016, forecast_model, csv_file_path,
                                   time_zero)

        # test
        tz_pk = time_zero.pk
        loc1_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 1').first().pk
        loc2_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 2').first().pk
        loc3_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 3').first().pk
        loc4_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 4').first().pk
        loc5_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 5').first().pk
        loc6_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 6').first().pk
        loc7_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 7').first().pk
        loc8_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 8').first().pk
        loc9_pk = Unit.objects.filter(project=project,
                                      name='HHS Region 9').first().pk
        loc10_pk = Unit.objects.filter(project=project,
                                       name='HHS Region 10').first().pk
        loc11_pk = Unit.objects.filter(project=project,
                                       name='US National').first().pk
        target1_pk = Target.objects.filter(project=project,
                                           name='Season onset').first().pk
        target2_pk = Target.objects.filter(project=project,
                                           name='Season peak week').first().pk
        target3_pk = Target.objects.filter(
            project=project, name='Season peak percentage').first().pk
        target4_pk = Target.objects.filter(project=project,
                                           name='1 wk ahead').first().pk
        target5_pk = Target.objects.filter(project=project,
                                           name='2 wk ahead').first().pk
        target6_pk = Target.objects.filter(project=project,
                                           name='3 wk ahead').first().pk
        target7_pk = Target.objects.filter(project=project,
                                           name='4 wk ahead').first().pk
        exp_dict = {  # {timezero_pk: {unit_pk: {target_id: truth_value}}}
            tz_pk: {
                loc1_pk: {
                    target1_pk: ['2016-12-25'],
                    target2_pk: [datetime.date(2017, 2, 5)],
                    target3_pk: [3.19221],
                    target4_pk: [1.52411],
                    target5_pk: [1.73987],
                    target6_pk: [2.06524],
                    target7_pk: [2.51375]
                },
                loc2_pk: {
                    target1_pk: ['2016-11-20'],
                    target2_pk: [datetime.date(2017, 2, 5)],
                    target3_pk: [6.93759],
                    target4_pk: [5.07086],
                    target5_pk: [5.68166],
                    target6_pk: [6.01053],
                    target7_pk: [6.49829]
                },
                loc3_pk: {
                    target1_pk: ['2016-12-18'],
                    target2_pk: [datetime.date(2017, 2, 12)],
                    target3_pk: [5.20003],
                    target4_pk: [2.81366],
                    target5_pk: [3.09968],
                    target6_pk: [3.45232],
                    target7_pk: [3.73339]
                },
                loc4_pk: {
                    target1_pk: ['2016-11-13'],
                    target2_pk: [datetime.date(2017, 2, 12)],
                    target3_pk: [5.5107],
                    target4_pk: [2.89395],
                    target5_pk: [3.68564],
                    target6_pk: [3.69188],
                    target7_pk: [4.53169]
                },
                loc5_pk: {
                    target1_pk: ['2016-12-25'],
                    target2_pk: [datetime.date(2017, 2, 12)],
                    target3_pk: [4.31787],
                    target4_pk: [2.11757],
                    target5_pk: [2.4432],
                    target6_pk: [2.76295],
                    target7_pk: [3.182]
                },
                loc6_pk: {
                    target1_pk: ['2017-01-08'],
                    target2_pk: [datetime.date(2017, 2, 5)],
                    target3_pk: [9.87589],
                    target4_pk: [4.80185],
                    target5_pk: [5.26955],
                    target6_pk: [6.10427],
                    target7_pk: [8.13221]
                },
                loc7_pk: {
                    target1_pk: ['2016-12-25'],
                    target2_pk: [datetime.date(2017, 2, 5)],
                    target3_pk: [6.35948],
                    target4_pk: [2.75581],
                    target5_pk: [3.46528],
                    target6_pk: [4.56991],
                    target7_pk: [5.52653]
                },
                loc8_pk: {
                    target1_pk: ['2016-12-18'],
                    target2_pk: [datetime.date(2017, 2, 12)],
                    target3_pk: [2.72703],
                    target4_pk: [1.90851],
                    target5_pk: [2.2668],
                    target6_pk: [2.07104],
                    target7_pk: [2.27632]
                },
                loc9_pk: {
                    target1_pk: ['2016-12-18'],
                    target2_pk: [datetime.date(2016, 12, 25)],
                    target3_pk: [3.30484],
                    target4_pk: [2.83778],
                    target5_pk: [2.68071],
                    target6_pk: [2.9577],
                    target7_pk: [3.03987]
                },
                loc10_pk: {
                    target1_pk: ['2016-12-11'],
                    target2_pk: [datetime.date(2016, 12, 25)],
                    target3_pk: [3.67061],
                    target4_pk: [2.15197],
                    target5_pk: [3.25108],
                    target6_pk: [2.51434],
                    target7_pk: [2.28634]
                },
                loc11_pk: {
                    target1_pk: ['2016-12-11'],
                    target2_pk: [datetime.date(2017, 2, 5)],
                    target3_pk: [5.06094],
                    target4_pk: [3.07623],
                    target5_pk: [3.50708],
                    target6_pk: [3.79872],
                    target7_pk: [4.43601]
                }
            }
        }
        act_dict = _tz_unit_targ_pks_to_truth_values(forecast_model.project)
        self.assertEqual(exp_dict, act_dict)
Exemplo n.º 18
0
    def test_load_forecast(self):
        self.assertEqual(1, len(self.forecast_model.forecasts.all()))
        self.assertIsInstance(self.forecast, Forecast)
        self.assertEqual('EW1-KoTstable-2017-01-17.csv', self.forecast.source)
        self.assertEqual(8019,
                         self.forecast.get_num_rows())  # excluding header

        # check 'US National' targets: spot-check a few point rows
        exp_points = [
            ('US National', '1 wk ahead', None, 3.00101461253164, None, None,
             None),  # _i, _f, _t, _d, _b
            ('US National', '2 wk ahead', None, 2.72809349594878, None, None,
             None),
            ('US National', '3 wk ahead', None, 2.5332588357381, None, None,
             None),
            ('US National', '4 wk ahead', None, 2.42985946508278, None, None,
             None),
            ('US National', 'Season onset', None, None, '2016-12-12', None,
             None),
            ('US National', 'Season peak percentage', None, 3.30854920241938,
             None, None, None),
            ('US National', 'Season peak week', None, None, None,
             datetime.date(2017, 1, 30), None)
        ]
        act_points_qs = self.forecast.point_prediction_qs() \
            .filter(unit__name='US National') \
            .order_by('unit__name', 'target__name') \
            .values_list('unit__name', 'target__name', 'value_i', 'value_f', 'value_t', 'value_d', 'value_b')
        self.assertEqual(exp_points, list(act_points_qs))

        # test empty file
        with self.assertRaises(RuntimeError) as context:
            csv_file_path = Path(
                'forecast_app/tests/EW1-bad_file_no_header-2017-01-17.csv'
            )  # EW01 2017?
            load_cdc_csv_forecast_file(2016, self.forecast_model,
                                       csv_file_path, self.time_zero)
        self.assertIn('empty file', str(context.exception))

        # test a bad data file header
        with self.assertRaises(RuntimeError) as context:
            csv_file_path = Path(
                'forecast_app/tests/EW1-bad_file_header-2017-01-17.csv'
            )  # EW01 2017?
            load_cdc_csv_forecast_file(2016, self.forecast_model,
                                       csv_file_path, self.time_zero)
        self.assertIn('invalid header', str(context.exception))

        # test load_forecast() with timezero not in the project
        project2 = Project.objects.create()  # no TimeZeros
        make_cdc_units_and_targets(project2)

        forecast_model2 = ForecastModel.objects.create(project=project2,
                                                       name='name',
                                                       abbreviation='abbrev')
        with self.assertRaises(RuntimeError) as context:
            csv_file_path = Path(
                'forecast_app/tests/model_error/ensemble/EW1-KoTstable-2017-01-17.csv'
            )  # EW01 2017
            load_cdc_csv_forecast_file(2016, forecast_model2, csv_file_path,
                                       self.time_zero)
        self.assertIn("time_zero was not in project", str(context.exception))
Exemplo n.º 19
0
    def test_load_truth_data_other_files(self):
        # test truth files that used to be in yyyymmdd or yyyyww (EW) formats
        # truths-ok.csv (2017-01-17-truths.csv would basically test the same)
        load_truth_data(self.project,
                        Path('forecast_app/tests/truth_data/truths-ok.csv'),
                        is_convert_na_none=True)
        exp_rows = [
            (datetime.date(2017, 1, 1), 'US National', '1 wk ahead', 0.73102),
            (datetime.date(2017, 1, 1), 'US National', '2 wk ahead', 0.688338),
            (datetime.date(2017, 1, 1), 'US National', '3 wk ahead', 0.732049),
            (datetime.date(2017, 1, 1), 'US National', '4 wk ahead', 0.911641),
            (datetime.date(2017, 1,
                           1), 'US National', 'Season onset', '2017-11-20')
        ]

        # note: https://code.djangoproject.com/ticket/32483 sqlite3 json query bug -> we manually access field instead
        # of using 'data__value'
        pred_data_qs = PredictionData.objects \
            .filter(pred_ele__forecast__forecast_model=oracle_model_for_project(self.project)) \
            .values_list('pred_ele__forecast__time_zero__timezero_date', 'pred_ele__unit__name',
                         'pred_ele__target__name', 'data')
        act_rows = [(tz_date, unit__name, target__name, data['value'])
                    for tz_date, unit__name, target__name, data in pred_data_qs
                    ]
        self.assertEqual(sorted(exp_rows), sorted(list(act_rows)))

        # truths-2016-2017-reichlab-small.csv
        project2 = Project.objects.create()
        TimeZero.objects.create(project=project2,
                                timezero_date=datetime.date(2016, 10, 30))
        make_cdc_units_and_targets(project2)
        load_truth_data(
            project2,
            Path(
                'forecast_app/tests/truth_data/truths-2016-2017-reichlab-small.csv'
            ),
            is_convert_na_none=True)
        exp_rows = [(datetime.date(2016, 10,
                                   30), 'US National', '1 wk ahead', 1.55838),
                    (datetime.date(2016, 10,
                                   30), 'US National', '2 wk ahead', 1.64639),
                    (datetime.date(2016, 10,
                                   30), 'US National', '3 wk ahead', 1.91196),
                    (datetime.date(2016, 10,
                                   30), 'US National', '4 wk ahead', 1.81129),
                    (datetime.date(2016, 10, 30), 'US National',
                     'Season onset', '2016-12-11'),
                    (datetime.date(2016, 10, 30), 'US National',
                     'Season peak percentage', 5.06094),
                    (datetime.date(2016, 10, 30), 'US National',
                     'Season peak week', '2017-02-05')]
        # note: https://code.djangoproject.com/ticket/32483 sqlite3 json query bug -> we manually access field instead
        # of using 'data__value'
        pred_data_qs = PredictionData.objects \
            .filter(pred_ele__forecast__forecast_model=oracle_model_for_project(project2)) \
            .values_list('pred_ele__forecast__time_zero__timezero_date', 'pred_ele__unit__name',
                         'pred_ele__target__name', 'data')
        act_rows = [(tz_date, unit__name, target__name, data['value'])
                    for tz_date, unit__name, target__name, data in pred_data_qs
                    ]
        self.assertEqual(sorted(exp_rows), sorted(list(act_rows)))
Exemplo n.º 20
0
    def test_mae(self):
        project2 = Project.objects.create()
        make_cdc_units_and_targets(project2)
        TimeZero.objects.create(project=project2,
                                timezero_date=datetime.date(2016, 10, 23),
                                is_season_start=True,
                                season_name='s1')
        TimeZero.objects.create(project=project2,
                                timezero_date=datetime.date(2016, 10, 30))
        TimeZero.objects.create(project=project2,
                                timezero_date=datetime.date(2016, 11, 6))
        forecast_model2 = ForecastModel.objects.create(project=project2,
                                                       name='name',
                                                       abbreviation='abbrev')
        load_cdc_csv_forecasts_from_dir(
            forecast_model2, Path('forecast_app/tests/load_forecasts'), 2016)
        load_truth_data(
            project2,
            Path(
                'utils/ensemble-truth-table-script/truths-2016-2017-reichlab.csv'
            ))

        Score.ensure_all_scores_exist()
        score = Score.objects.filter(
            abbreviation='abs_error').first()  # hard-coded official abbrev
        score.update_score_for_model(forecast_model2)

        score_value_rows_for_season = _score_value_rows_for_season(
            project2, 's1')
        self.assertEqual(
            5 * 11, len(score_value_rows_for_season))  # 5 targets * 11 units

        # spot-check a unit
        exp_maes = [
            0.1830079332082548, 0.127335480231265, 0.040631614561185525,
            0.09119562794624952, 0.15125133156909953
        ]
        hhs1_loc = project2.units.filter(name='HHS Region 1').first()
        hhs1_loc_rows = filter(lambda row: row[0] == hhs1_loc.id,
                               score_value_rows_for_season)
        act_maes = [row[-1] for row in hhs1_loc_rows]
        for exp_mae, act_mae in zip(exp_maes, act_maes):
            self.assertAlmostEqual(exp_mae, act_mae)

        # test unit_to_mean_abs_error_rows_for_project(), since we have a nice fixture
        loc_to_mae_rows_no_season = unit_to_mean_abs_error_rows_for_project(
            project2, None)
        self.assertEqual(loc_to_mae_rows_no_season,
                         unit_to_mean_abs_error_rows_for_project(
                             project2, 's1'))  # season_name shouldn't matter
        self.assertEqual(set(project2.units.values_list('name', flat=True)),
                         set(loc_to_mae_rows_no_season))

        exp_rows = [[
            'Model', '1 wk ahead', '2 wk ahead', '3 wk ahead', '4 wk ahead',
            'Season peak percentage'
        ],
                    [
                        forecast_model2.pk, 0.127335480231265,
                        0.040631614561185525, 0.09119562794624952,
                        0.15125133156909953, 0.1830079332082548
                    ]]
        act_rows = loc_to_mae_rows_no_season[hhs1_loc.name][0]
        self.assertEqual(exp_rows[0], act_rows[0])  # header
        self.assertEqual(exp_rows[1][0], act_rows[1][0])  # model
        self.assertAlmostEqual(exp_rows[1][1], act_rows[1][1])  # 1 wk ahead
        self.assertAlmostEqual(exp_rows[1][2], act_rows[1][2])
        self.assertAlmostEqual(exp_rows[1][3], act_rows[1][3])
        self.assertAlmostEqual(exp_rows[1][4], act_rows[1][4])
        self.assertAlmostEqual(exp_rows[1][5], act_rows[1][5])

        target_spp = project2.targets.filter(
            name='Season peak percentage').first()
        target_1wk = project2.targets.filter(name='1 wk ahead').first()
        target_2wk = project2.targets.filter(name='2 wk ahead').first()
        target_3wk = project2.targets.filter(name='3 wk ahead').first()
        target_4wk = project2.targets.filter(name='4 wk ahead').first()
        exp_loc_to_min = {
            target_spp: 0.1830079332082548,
            target_1wk: 0.127335480231265,
            target_2wk: 0.040631614561185525,
            target_3wk: 0.09119562794624952,
            target_4wk: 0.15125133156909953
        }
        act_loc_to_min = loc_to_mae_rows_no_season[hhs1_loc.name][1]
        self.assertAlmostEqual(exp_loc_to_min[target_spp],
                               act_loc_to_min[target_spp.name])
        self.assertAlmostEqual(exp_loc_to_min[target_1wk],
                               act_loc_to_min[target_1wk.name])
        self.assertAlmostEqual(exp_loc_to_min[target_2wk],
                               act_loc_to_min[target_2wk.name])
        self.assertAlmostEqual(exp_loc_to_min[target_3wk],
                               act_loc_to_min[target_3wk.name])
        self.assertAlmostEqual(exp_loc_to_min[target_4wk],
                               act_loc_to_min[target_4wk.name])