Exemplo n.º 1
0
def _make_docs_project(user):
    """
    Creates a project based on docs-project.json with forecasts from docs-predictions.json.
    """
    found_project = Project.objects.filter(name=DOCS_PROJECT_NAME).first()
    if found_project:
        click.echo("* deleting previous project: {}".format(found_project))
        found_project.delete()

    project = create_project_from_json(
        Path('forecast_app/tests/projects/docs-project.json'), user)  # atomic
    project.name = DOCS_PROJECT_NAME
    project.save()

    load_truth_data(
        project, Path('forecast_app/tests/truth_data/docs-ground-truth.csv'))

    forecast_model = ForecastModel.objects.create(project=project,
                                                  name='docs forecast model',
                                                  abbreviation='docs_mod')
    time_zero = project.timezeros.filter(
        timezero_date=datetime.date(2011, 10, 2)).first()
    forecast = Forecast.objects.create(forecast_model=forecast_model,
                                       source='docs-predictions.json',
                                       time_zero=time_zero,
                                       notes="a small prediction file")
    with open('forecast_app/tests/predictions/docs-predictions.json') as fp:
        json_io_dict_in = json.load(fp)
        load_predictions_from_json_io_dict(forecast, json_io_dict_in,
                                           False)  # atomic
        cache_forecast_metadata(forecast)  # atomic

    return project, time_zero, forecast_model, forecast
    def test_cache_forecast_metadata_predictions(self):
        self.assertEqual(
            0,
            ForecastMetaPrediction.objects.filter(
                forecast=self.forecast).count())

        cache_forecast_metadata(self.forecast)
        forecast_meta_prediction_qs = ForecastMetaPrediction.objects.filter(
            forecast=self.forecast)
        self.assertEqual(1, forecast_meta_prediction_qs.count())

        meta_cache_prediction = forecast_meta_prediction_qs.first(
        )  # only one row
        self.assertEqual(11, meta_cache_prediction.point_count)
        self.assertEqual(2, meta_cache_prediction.named_count)
        self.assertEqual(16, meta_cache_prediction.bin_count)
        self.assertEqual(23, meta_cache_prediction.sample_count)
        self.assertEqual(10, meta_cache_prediction.quantile_count)

        # second run first deletes existing rows, resulting in the same number as before
        cache_forecast_metadata(self.forecast)
        self.assertEqual(
            1,
            ForecastMetaPrediction.objects.filter(
                forecast=self.forecast).count())
def update(project_pk, no_enqueue):
    """
    A subcommand that updates all one or all projects' forecast metadata.

    :param project_pk: if a valid Project pk then only that project's metadata is updated. o/w updates all
    :param no_enqueue: controls whether the update will be immediate in the calling thread (blocks), or enqueued for RQ
    """
    from forecast_repo.settings.base import CACHE_FORECAST_METADATA_QUEUE_NAME  # avoid circular imports


    queue = django_rq.get_queue(CACHE_FORECAST_METADATA_QUEUE_NAME)
    projects = [get_object_or_404(Project, pk=project_pk)] if project_pk else Project.objects.all()
    print("updating metadata")
    for project in projects:
        print(f"* {project}")
        for forecast_model in project.models.all():
            print(f"- {forecast_model}")
            for forecast in forecast_model.forecasts.all():
                if no_enqueue:
                    print(f"  = caching metadata (no enqueue): {forecast}")
                    cache_forecast_metadata(forecast)
                else:
                    print(f"  = enqueuing caching metadata: {forecast}")
                    queue.enqueue(_cache_forecast_metadata_worker, forecast.pk)
    print("update done")
Exemplo n.º 4
0
def load_cdc_csv_forecast_file(season_start_year, forecast_model,
                               cdc_csv_file_path, time_zero):
    """
    Loads the passed cdc csv file into a new forecast_model Forecast for time_zero. NB: does not check if a Forecast
    already exists for time_zero and file_name. Is atomic so that an invalid forecast's data is not saved.

    :param season_start_year
    :param forecast_model: the ForecastModel to create the new Forecast in
    :param cdc_csv_file_path: string or Path to a CDC CSV forecast file. the CDC CSV file format is documented at
        https://predict.cdc.gov/api/v1/attachments/flusight/flu_challenge_2016-17_update.docx
    :param time_zero: the TimeZero this forecast applies to
    :return returns a new Forecast for it
    :raises RuntimeError: if the data could not be loaded
    """
    if time_zero not in forecast_model.project.timezeros.all():
        raise RuntimeError(
            f"time_zero was not in project. time_zero={time_zero}, "
            f"project timezeros={forecast_model.project.timezeros.all()}")

    cdc_csv_file_path = Path(cdc_csv_file_path)
    file_name = cdc_csv_file_path.name
    new_forecast = Forecast.objects.create(forecast_model=forecast_model,
                                           time_zero=time_zero,
                                           source=file_name)
    with open(cdc_csv_file_path) as cdc_csv_file_fp:
        json_io_dict = json_io_dict_from_cdc_csv_file(season_start_year,
                                                      cdc_csv_file_fp)
        load_predictions_from_json_io_dict(new_forecast,
                                           json_io_dict,
                                           is_validate_cats=False)  # atomic
        cache_forecast_metadata(new_forecast)  # atomic
    return new_forecast
    def test_cache_forecast_metadata_second_forecast(self):
        # make sure only the passed forecast is cached
        forecast2 = Forecast.objects.create(forecast_model=self.forecast_model,
                                            source='docs-predictions.json',
                                            time_zero=self.time_zero,
                                            notes="a small prediction file")
        with open(
                'forecast_app/tests/predictions/docs-predictions.json') as fp:
            json_io_dict_in = json.load(fp)
            load_predictions_from_json_io_dict(forecast2, json_io_dict_in,
                                               False)

        self.assertEqual(
            0,
            ForecastMetaPrediction.objects.filter(
                forecast=self.forecast).count())
        self.assertEqual(
            0,
            ForecastMetaUnit.objects.filter(forecast=self.forecast).count())
        self.assertEqual(
            0,
            ForecastMetaTarget.objects.filter(forecast=self.forecast).count())

        self.assertEqual(
            0,
            ForecastMetaPrediction.objects.filter(forecast=forecast2).count())
        self.assertEqual(
            0,
            ForecastMetaUnit.objects.filter(forecast=forecast2).count())
        self.assertEqual(
            0,
            ForecastMetaTarget.objects.filter(forecast=forecast2).count())

        cache_forecast_metadata(self.forecast)
        self.assertEqual(
            1,
            ForecastMetaPrediction.objects.filter(
                forecast=self.forecast).count())
        self.assertEqual(
            3,
            ForecastMetaUnit.objects.filter(forecast=self.forecast).count())
        self.assertEqual(
            5,
            ForecastMetaTarget.objects.filter(forecast=self.forecast).count())

        self.assertEqual(
            0,
            ForecastMetaPrediction.objects.filter(forecast=forecast2).count())
        self.assertEqual(
            0,
            ForecastMetaUnit.objects.filter(forecast=forecast2).count())
        self.assertEqual(
            0,
            ForecastMetaTarget.objects.filter(forecast=forecast2).count())
Exemplo n.º 6
0
    def test_cache_forecast_metadata_targets(self):
        self.assertEqual(0, ForecastMetaTarget.objects.filter(forecast=self.forecast).count())

        cache_forecast_metadata(self.forecast)
        forecast_meta_target_qs = ForecastMetaTarget.objects.filter(forecast=self.forecast)
        self.assertEqual(5, forecast_meta_target_qs.count())
        self.assertEqual(set(self.project.targets.all()), set([fmt.target for fmt in forecast_meta_target_qs]))

        # second run first deletes existing rows, resulting in the same number as before
        cache_forecast_metadata(self.forecast)
        self.assertEqual(5, forecast_meta_target_qs.count())
Exemplo n.º 7
0
    def test_cache_forecast_metadata_clears_first(self):
        self.assertEqual(0, ForecastMetaPrediction.objects.filter(forecast=self.forecast).count())
        self.assertEqual(0, ForecastMetaUnit.objects.filter(forecast=self.forecast).count())
        self.assertEqual(0, ForecastMetaTarget.objects.filter(forecast=self.forecast).count())

        # first run creates rows, second run first deletes existing rows, resulting in the same number as before
        for _ in range(2):
            cache_forecast_metadata(self.forecast)
            self.assertEqual(1, ForecastMetaPrediction.objects.filter(forecast=self.forecast).count())
            self.assertEqual(3, ForecastMetaUnit.objects.filter(forecast=self.forecast).count())
            self.assertEqual(5, ForecastMetaTarget.objects.filter(forecast=self.forecast).count())

        clear_forecast_metadata(self.forecast)
        self.assertEqual(0, ForecastMetaPrediction.objects.filter(forecast=self.forecast).count())
        self.assertEqual(0, ForecastMetaUnit.objects.filter(forecast=self.forecast).count())
        self.assertEqual(0, ForecastMetaTarget.objects.filter(forecast=self.forecast).count())
Exemplo n.º 8
0
    def test_forecast_metadata_counts_for_f_ids(self):
        forecast2 = Forecast.objects.create(forecast_model=self.forecast_model, source='docs-predictions-non-dup.json',
                                            time_zero=self.time_zero, notes="a small prediction file")
        with open('forecast_app/tests/predictions/docs-predictions-non-dup.json') as fp:
            json_io_dict_in = json.load(fp)
            load_predictions_from_json_io_dict(forecast2, json_io_dict_in, is_validate_cats=False)
        cache_forecast_metadata(self.forecast)
        cache_forecast_metadata(forecast2)

        forecasts_qs = self.forecast_model.forecasts.all()
        forecast_id_to_counts = forecast_metadata_counts_for_f_ids(forecasts_qs)
        #  f_id:  [(point_count, named_count, bin_count, sample_count, quantile_count), num_names, num_targets]
        # {   4:  [(11,          2,           6,         7,            3),              3,         5          ],
        #     5:  [(11,          2,           6,         7,            3),              3,         5          ]}
        self.assertEqual(sorted([self.forecast.id, forecast2.id]), sorted(forecast_id_to_counts.keys()))
        self.assertEqual([(11, 2, 6, 7, 3), 3, 5], forecast_id_to_counts[self.forecast.id])
        self.assertEqual([(11, 2, 6, 7, 3), 3, 5], forecast_id_to_counts[forecast2.id])
Exemplo n.º 9
0
    def test_metadata_for_forecast(self):
        cache_forecast_metadata(self.forecast)
        forecast_meta_prediction, forecast_meta_unit_qs, forecast_meta_target_qs = forecast_metadata(self.forecast)

        self.assertIsInstance(forecast_meta_prediction, ForecastMetaPrediction)
        self.assertEqual(11, forecast_meta_prediction.point_count)
        self.assertEqual(2, forecast_meta_prediction.named_count)
        self.assertEqual(6, forecast_meta_prediction.bin_count)
        self.assertEqual(7, forecast_meta_prediction.sample_count)
        self.assertEqual(3, forecast_meta_prediction.quantile_count)

        self.assertIsInstance(forecast_meta_unit_qs, QuerySet)
        self.assertEqual(3, len(forecast_meta_unit_qs))
        self.assertEqual({ForecastMetaUnit}, set(map(type, forecast_meta_unit_qs)))

        self.assertIsInstance(forecast_meta_target_qs, QuerySet)
        self.assertEqual(5, len(forecast_meta_target_qs))
        self.assertEqual({ForecastMetaTarget}, set(map(type, forecast_meta_target_qs)))
Exemplo n.º 10
0
    def test_target_rows_for_project(self):
        _, _, po_user, _, _, _, _, _ = get_or_create_super_po_mo_users(
            is_create_super=True)
        # recall that _make_docs_project() calls cache_forecast_metadata():
        project, time_zero, forecast_model, forecast = _make_docs_project(
            po_user)  # 2011, 10, 2

        # case: one model with one timezero that has five groups of one target each.
        # recall: `group_targets(project.targets.all())` (only one target/group in this case):
        #   {'pct next week':    [(1, 'pct next week', 'continuous', True, 1, 'percent')],
        #    'cases next week':  [(2, 'cases next week', 'discrete', True, 2, 'cases')],
        #    'season severity':  [(3, 'season severity', 'nominal', False, None, None)],
        #    'above baseline':   [(4, 'above baseline', 'binary', False, None, None)],
        #    'Season peak week': [(5, 'Season peak week', 'date', False, None, 'week')]}
        exp_rows = [(forecast_model, str(time_zero.timezero_date), forecast.id,
                     'Season peak week', 1),
                    (forecast_model, str(time_zero.timezero_date), forecast.id,
                     'above baseline', 1),
                    (forecast_model, str(time_zero.timezero_date), forecast.id,
                     'cases next week', 1),
                    (forecast_model, str(time_zero.timezero_date), forecast.id,
                     'pct next week', 1),
                    (forecast_model, str(time_zero.timezero_date), forecast.id,
                     'season severity', 1)]
        act_rows = [(row[0], str(row[1]), row[2], row[3], row[4])
                    for row in target_rows_for_project(project)]
        self.assertEqual(sorted(exp_rows, key=lambda _: _[0].id),
                         sorted(act_rows, key=lambda _: _[0].id))

        # case: add a second forecast for a newer timezero
        time_zero2 = TimeZero.objects.create(project=project,
                                             timezero_date=datetime.date(
                                                 2011, 10, 3))
        forecast2 = Forecast.objects.create(forecast_model=forecast_model,
                                            source='docs-predictions.json',
                                            time_zero=time_zero2,
                                            notes="a small prediction file")
        with open(
                'forecast_app/tests/predictions/docs-predictions.json') as fp:
            json_io_dict_in = json.load(fp)
            load_predictions_from_json_io_dict(forecast2, json_io_dict_in,
                                               False)
            cache_forecast_metadata(
                forecast2
            )  # required by _forecast_ids_to_present_unit_or_target_id_sets()

        exp_rows = [(forecast_model, str(time_zero2.timezero_date),
                     forecast2.id, 'Season peak week', 1),
                    (forecast_model, str(time_zero2.timezero_date),
                     forecast2.id, 'above baseline', 1),
                    (forecast_model, str(time_zero2.timezero_date),
                     forecast2.id, 'cases next week', 1),
                    (forecast_model, str(time_zero2.timezero_date),
                     forecast2.id, 'pct next week', 1),
                    (forecast_model, str(time_zero2.timezero_date),
                     forecast2.id, 'season severity', 1)]
        act_rows = [(row[0], str(row[1]), row[2], row[3], row[4])
                    for row in target_rows_for_project(project)]
        self.assertEqual(sorted(exp_rows, key=lambda _: _[0].id),
                         sorted(act_rows, key=lambda _: _[0].id))

        # case: add a second model with only forecasts for one target
        forecast_model2 = ForecastModel.objects.create(
            project=project,
            name=forecast_model.name + '2',
            abbreviation=forecast_model.abbreviation + '2')
        time_zero3 = TimeZero.objects.create(project=project,
                                             timezero_date=datetime.date(
                                                 2011, 10, 4))
        forecast3 = Forecast.objects.create(forecast_model=forecast_model2,
                                            source='docs-predictions.json',
                                            time_zero=time_zero3,
                                            notes="a small prediction file")
        json_io_dict = {
            "meta": {},
            "predictions": [{
                "unit": "location1",
                "target": "pct next week",
                "class": "point",
                "prediction": {
                    "value": 2.1
                }
            }]
        }
        load_predictions_from_json_io_dict(forecast3, json_io_dict, False)
        cache_forecast_metadata(
            forecast3
        )  # required by _forecast_ids_to_present_unit_or_target_id_sets()

        exp_rows = exp_rows + [(forecast_model2, str(
            time_zero3.timezero_date), forecast3.id, 'pct next week', 1)]
        act_rows = [(row[0], str(row[1]), row[2], row[3], row[4])
                    for row in target_rows_for_project(project)]
        self.assertEqual(sorted(exp_rows, key=lambda _: _[0].id),
                         sorted(act_rows, key=lambda _: _[0].id))

        # case: no forecasts
        forecast.delete()
        forecast2.delete()
        forecast3.delete()
        exp_rows = [(forecast_model, '', '', '', 0),
                    (forecast_model2, '', '', '', 0)]
        act_rows = [(row[0], str(row[1]), row[2], row[3], row[4])
                    for row in target_rows_for_project(project)]
        self.assertEqual(sorted(exp_rows, key=lambda _: _[0].id),
                         sorted(act_rows, key=lambda _: _[0].id))
Exemplo n.º 11
0
    def test_unit_rows_for_project(self):
        _, _, po_user, _, _, _, _, _ = get_or_create_super_po_mo_users(
            is_create_super=True)
        # recall that _make_docs_project() calls cache_forecast_metadata():
        project, time_zero, forecast_model, forecast = _make_docs_project(
            po_user)  # 2011, 10, 2

        # case: one model with one timezero. recall rows:
        # (model, newest_forecast_tz_date, newest_forecast_id,
        #  num_present_unit_names, present_unit_names, missing_unit_names):
        exp_rows = [(forecast_model, str(time_zero.timezero_date), forecast.id,
                     3, '(all)', '')]
        act_rows = [(row[0], str(row[1]), row[2], row[3], row[4], row[5])
                    for row in unit_rows_for_project(project)]
        self.assertEqual(exp_rows, act_rows)

        # case: add a second forecast for a newer timezero
        time_zero2 = TimeZero.objects.create(project=project,
                                             timezero_date=datetime.date(
                                                 2011, 10, 3))
        forecast2 = Forecast.objects.create(forecast_model=forecast_model,
                                            source='docs-predictions.json',
                                            time_zero=time_zero2,
                                            notes="a small prediction file")
        with open(
                'forecast_app/tests/predictions/docs-predictions.json') as fp:
            json_io_dict_in = json.load(fp)
            load_predictions_from_json_io_dict(forecast2, json_io_dict_in,
                                               False)
            cache_forecast_metadata(
                forecast2
            )  # required by _forecast_ids_to_present_unit_or_target_id_sets()

        exp_rows = [(forecast_model, str(time_zero2.timezero_date),
                     forecast2.id, 3, '(all)', '')]
        act_rows = [(row[0], str(row[1]), row[2], row[3], row[4], row[5])
                    for row in unit_rows_for_project(project)]
        self.assertEqual(exp_rows, act_rows)

        # case: add a second model with only forecasts for one unit
        forecast_model2 = ForecastModel.objects.create(
            project=project,
            name=forecast_model.name + '2',
            abbreviation=forecast_model.abbreviation + '2')
        time_zero3 = TimeZero.objects.create(project=project,
                                             timezero_date=datetime.date(
                                                 2011, 10, 4))
        forecast3 = Forecast.objects.create(forecast_model=forecast_model2,
                                            source='docs-predictions.json',
                                            time_zero=time_zero3,
                                            notes="a small prediction file")
        json_io_dict = {
            "meta": {},
            "predictions": [{
                "unit": "location1",
                "target": "pct next week",
                "class": "point",
                "prediction": {
                    "value": 2.1
                }
            }]
        }
        load_predictions_from_json_io_dict(forecast3, json_io_dict, False)
        cache_forecast_metadata(
            forecast3
        )  # required by _forecast_ids_to_present_unit_or_target_id_sets()

        exp_rows = [(forecast_model, str(time_zero2.timezero_date),
                     forecast2.id, 3, '(all)', ''),
                    (forecast_model2, str(time_zero3.timezero_date),
                     forecast3.id, 1, 'location1', 'location2, location3')]
        act_rows = [(row[0], str(row[1]), row[2], row[3], row[4], row[5])
                    for row in unit_rows_for_project(project)]
        self.assertEqual(exp_rows, act_rows)

        # case: exposes bug: syntax error when no forecasts in project:
        #   psycopg2.errors.SyntaxError: syntax error at or near ")"
        #   LINE 6:             WHERE f.id IN ()
        forecast.delete()
        forecast2.delete()
        forecast3.delete()
        # (model, newest_forecast_tz_date, newest_forecast_id, num_present_unit_names, present_unit_names,
        #  missing_unit_names):
        exp_rows = [(forecast_model, 'None', None, 0, '', '(all)'),
                    (forecast_model2, 'None', None, 0, '', '(all)')]
        act_rows = [(row[0], str(row[1]), row[2], row[3], row[4], row[5])
                    for row in unit_rows_for_project(project)]
        self.assertEqual(sorted(exp_rows, key=lambda _: _[0].id),
                         sorted(act_rows, key=lambda _: _[0].id))
    def test_is_forecast_metadata_available(self):
        self.assertFalse(is_forecast_metadata_available(self.forecast))

        cache_forecast_metadata(self.forecast)
        self.assertTrue(is_forecast_metadata_available(self.forecast))
    def test_calc_interval_20_docs_project_additional_version(self):
        Score.ensure_all_scores_exist()
        interval_20_score = Score.objects.filter(
            abbreviation='interval_20').first()
        self.assertIsNotNone(interval_20_score)

        _, _, po_user, _, _, _, _, _ = get_or_create_super_po_mo_users(
            is_create_super=True)
        project, time_zero, forecast_model, forecast = _make_docs_project(
            po_user)

        unit_loc2 = project.units.filter(name='location2').first()
        targ_pct_next_wk = project.targets.filter(
            name='pct next week').first()  # continuous
        unit_loc3 = project.units.filter(name='location3').first()
        targ_cases_next_wk = project.targets.filter(
            name='cases next week').first()  # discrete

        # add two truths that result in two ScoreValues
        project.delete_truth_data()
        TruthData.objects.create(time_zero=time_zero,
                                 unit=unit_loc2,
                                 target=targ_pct_next_wk,
                                 value_f=2.2)  # 2/7)
        TruthData.objects.create(time_zero=time_zero,
                                 unit=unit_loc3,
                                 target=targ_cases_next_wk,
                                 value_i=50)  # 6/7
        ScoreValue.objects \
            .filter(score=interval_20_score, forecast__forecast_model=forecast_model) \
            .delete()  # usually done by update_score_for_model()
        _calculate_interval_score_values(interval_20_score, forecast_model,
                                         0.5)
        self.assertEqual(2, interval_20_score.values.count())
        self.assertEqual([2.8, 50],
                         sorted(interval_20_score.values.all().values_list(
                             'value', flat=True)))

        # add a second forecast for a newer timezero
        time_zero2 = TimeZero.objects.create(project=project,
                                             timezero_date=datetime.date(
                                                 2011, 10, 3))
        forecast2 = Forecast.objects.create(forecast_model=forecast_model,
                                            source='docs-predictions.json',
                                            time_zero=time_zero2,
                                            notes="a small prediction file")
        with open(
                'forecast_app/tests/predictions/docs-predictions.json') as fp:
            json_io_dict_in = json.load(fp)
            load_predictions_from_json_io_dict(forecast2, json_io_dict_in,
                                               False)
        TruthData.objects.create(time_zero=time_zero2,
                                 unit=unit_loc2,
                                 target=targ_pct_next_wk,
                                 value_f=2.2)  # 2/7)
        TruthData.objects.create(time_zero=time_zero2,
                                 unit=unit_loc3,
                                 target=targ_cases_next_wk,
                                 value_i=50)  # 6/7
        ScoreValue.objects \
            .filter(score=interval_20_score, forecast__forecast_model=forecast_model) \
            .delete()  # usually done by update_score_for_model()
        _calculate_interval_score_values(interval_20_score, forecast_model,
                                         0.5)
        self.assertEqual(4, interval_20_score.values.count())

        # finally, add a new version to timezero
        forecast.issue_date = forecast.time_zero.timezero_date
        forecast.save()

        forecast2.issue_date = forecast2.time_zero.timezero_date
        forecast2.save()

        forecast2 = Forecast.objects.create(forecast_model=forecast_model,
                                            source='f2',
                                            time_zero=time_zero)
        with open(
                'forecast_app/tests/predictions/docs-predictions.json') as fp:
            json_io_dict_in = json.load(fp)
            load_predictions_from_json_io_dict(forecast2, json_io_dict_in,
                                               False)  # atomic
            cache_forecast_metadata(forecast2)  # atomic

        # s/b no change from previous
        ScoreValue.objects \
            .filter(score=interval_20_score, forecast__forecast_model=forecast_model) \
            .delete()  # usually done by update_score_for_model()

        # RuntimeError: >2 lower_upper_interval_values: [2.2, 2.2, 5.0, 5.0]. timezero_id=4, unit_id=5, target_id=6
        _calculate_interval_score_values(interval_20_score, forecast_model,
                                         0.5)

        self.assertEqual(4, interval_20_score.values.count())