Exemplo n.º 1
0
    def test_county_cases(self):
        # test blue sky
        with open('tests/county-examples/correct.csv') as quantile_fp:
            _, error_messages = json_io_dict_from_quantile_csv_file(
                quantile_fp, COVID_TARGETS, covid19_row_validator,
                COVID_ADDL_REQ_COLS)
        self.assertEqual(0, len(error_messages))

        # test invalid combinations
        file_exp_num_errors_messages = [
            ('invalid-inc-hosp-target-for-county.csv', 8,
             'invalid location for target'),
            ('invalid-quantiles-for-case-target.csv', 16,
             'invalid quantile for target'),
            ('invalid-wk-cum-death-target-for-county.csv', 8,
             'invalid location for target'),
            ('invalid-wk-inc-death-target-for-county.csv', 8,
             'invalid location for target'),
        ]
        for quantile_file, exp_num_errors, exp_message in file_exp_num_errors_messages:
            with open('tests/county-examples/' + quantile_file) as quantile_fp:
                _, error_messages = json_io_dict_from_quantile_csv_file(
                    quantile_fp, COVID_TARGETS, covid19_row_validator,
                    COVID_ADDL_REQ_COLS)
                self.assertEqual(exp_num_errors, len(error_messages))
                self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0])
                self.assertIn(
                    exp_message, error_messages[0]
                    [1])  # arbitrarily pick first message. all are similar
Exemplo n.º 2
0
 def test_json_io_dict_from_quantile_csv_file_calls_validate_header(self):
     column_index_dict = {
         'target': 0,
         'location': 1,
         'type': 2,
         'quantile': 3,
         'value': 4
     }
     with patch('zoltpy.quantile_io._validate_header', return_value=(column_index_dict, None)) as mock, \
             open('tests/quantile-predictions.csv') as quantile_fp:
         json_io_dict_from_quantile_csv_file(
             quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp'])
         self.assertEqual(1, mock.call_count)
Exemplo n.º 3
0
    def test_json_io_dict_from_point_csv_file_bad_values(self):
        # test various bad point values, quantile values, and quantiles
        file_exp_num_errors_messages = [
            ('quantile-predictions-empty-point.csv', 1,
             'entries in the `value` column must be an int or float'),
            ('quantile-predictions-nan-point.csv', 1,
             'entries in the `value` column must be an int or float'),
            ('quantile-predictions-inf-point.csv', 1,
             'entries in the `value` column must be an int or float'),
            ('quantile-predictions-empty-quantile-value.csv', 2,
             'entries in the `value` column must be an int or float'),
            ('quantile-predictions-inf-quantile-value.csv', 1,
             'entries in the `value` column must be an int or float'),
            ('quantile-predictions-nan-quantile-value.csv', 2,
             'entries in the `value` column must be an int or float'),
            ('quantile-predictions-nan-quantile.csv', 1,
             'entries in the `quantile` column must be an int or float in [0, 1]'
             ),
        ]
        for quantile_file, exp_num_errors, exp_message in file_exp_num_errors_messages:
            with open('tests/bad-values/' + quantile_file) as quantile_fp:
                _, error_messages = json_io_dict_from_quantile_csv_file(
                    quantile_fp,
                    ['1 wk ahead cum death', '1 day ahead inc hosp'])
                self.assertEqual(exp_num_errors, len(error_messages))
                self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0])

                # note: for those with 2 errors, the 2nd one (MESSAGE_QUANTILES_AND_VALUES) is checked elsewhere
                self.assertIn(exp_message, error_messages[0][1])
Exemplo n.º 4
0
 def test_error_messages_actual_file_with_errors(self):
     file_exp_num_errors_message_priority_messages = [
         ('2020-04-12-IHME-CurveFit.csv', 5, MESSAGE_QUANTILES_AND_VALUES, [
             "Entries in `value` must be non-decreasing as quantiles increase"
         ]),
         ('2020-04-15-Geneva-DeterministicGrowth.csv', 9,
          MESSAGE_FORECAST_CHECKS,
          ["invalid location for target", "invalid target name(s)"]),
         ('2020-05-17-CovidActNow-SEIR_CAN.csv', 10,
          MESSAGE_FORECAST_CHECKS,
          ["entries in the `value` column must be non-negative"]),
         ('2020-06-21-USC-SI_kJalpha.csv', 1, MESSAGE_FORECAST_CHECKS,
          ["entries in the `value` column must be non-negative"]),
     ]
     for quantile_file, exp_num_errors, exp_priority, exp_error_messages in \
             file_exp_num_errors_message_priority_messages:
         with open('tests/covid19-data-processed-examples/' +
                   quantile_file) as quantile_fp:
             _, act_error_messages = json_io_dict_from_quantile_csv_file(
                 quantile_fp,
                 COVID_TARGETS,
                 covid19_row_validator,
                 addl_req_cols=COVID_ADDL_REQ_COLS)
             self.assertEqual(exp_num_errors, len(act_error_messages),
                              exp_error_messages)
             for act_priority, act_error_message in act_error_messages:
                 self.assertEqual(exp_priority, act_priority)
                 self.assertTrue(
                     any([
                         exp_error_message in act_error_message
                         for exp_error_message in exp_error_messages
                     ]))
Exemplo n.º 5
0
 def test_other_ok_quantile_files(self):
     with open('tests/quantiles-CU-60contact.csv') as quantile_fp:
         _, error_messages = json_io_dict_from_quantile_csv_file(
             quantile_fp,
             COVID_TARGETS,
             covid19_row_validator,
             addl_req_cols=COVID_ADDL_REQ_COLS)
         self.assertEqual(0, len(error_messages))
Exemplo n.º 6
0
    def test_json_io_dict_from_quantile_csv_file_nan(self):
        with open('tests/quantile-predictions-nan-point.csv') as quantile_fp:
            _, error_messages = \
                json_io_dict_from_quantile_csv_file(quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp'])
            self.assertEqual(1, len(error_messages))
            self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0])
        self.assertIn('entries in the `value` column must be an int or float',
                      error_messages[0][1])

        with open(
                'tests/quantile-predictions-nan-quantile.csv') as quantile_fp:
            _, error_messages = \
                json_io_dict_from_quantile_csv_file(quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp'])
            self.assertEqual(1, len(error_messages))
            self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0])
            self.assertIn(
                'entries in the `quantile` column must be an int or float in [0, 1]',
                error_messages[0][1])
Exemplo n.º 7
0
 def test_json_io_dict_from_quantile_csv_file_no_points(self):
     with open('tests/quantile-predictions-no-point.csv') as quantile_fp:
         _, error_messages = json_io_dict_from_quantile_csv_file(
             quantile_fp, ['1 day ahead inc hosp', '1 wk ahead cum death'])
         self.assertEqual(1, len(error_messages))
         self.assertEqual(MESSAGE_QUANTILES_AS_A_GROUP,
                          error_messages[0][0])
         self.assertIn(
             "There must be exactly one point prediction for each location/target pair",
             error_messages[0][1])
Exemplo n.º 8
0
 def test_json_io_dict_from_quantile_csv_file_small_tolerance(self):
     with open(
             'tests/covid19-data-processed-examples/2020-04-20-YYG-ParamSearch-small.csv'
     ) as quantile_fp:
         _, error_messages = json_io_dict_from_quantile_csv_file(
             quantile_fp,
             COVID_TARGETS,
             covid19_row_validator,
             addl_req_cols=COVID_ADDL_REQ_COLS)
         self.assertEqual(0, len(error_messages))
Exemplo n.º 9
0
 def test_json_io_dict_from_invalid_type_header(self):
     with open(
             'tests/covid19-data-processed-examples/2020-04-20-YYG-invalid-type.csv'
     ) as quantile_fp:
         _, error_messages = json_io_dict_from_quantile_csv_file(
             quantile_fp,
             COVID_TARGETS,
             covid19_row_validator,
             addl_req_cols=COVID_ADDL_REQ_COLS)
         self.assertEqual(1, len(error_messages))
Exemplo n.º 10
0
 def test_json_io_dict_from_quantile_csv_file_bad_row_count(self):
     with open('tests/quantiles-bad-row-count.csv'
               ) as quantile_fp:  # header: 6, row: 5
         _, error_messages = json_io_dict_from_quantile_csv_file(
             quantile_fp, COVID_TARGETS)
         exp_errors = [(
             MESSAGE_FORECAST_CHECKS,
             "invalid number of items in row. len(header)=5 but len(row)=4. "
             "row=['1 wk ahead cum death', 'point', 'NA', '7.74526423651839']"
         )]
         self.assertEqual(exp_errors, error_messages)
Exemplo n.º 11
0
 def test_empty_forecast(self):
     with open('tests/bad-values/quantile-predictions-no-data.csv'
               ) as quantile_fp:
         try:
             _, error_messages = json_io_dict_from_quantile_csv_file(
                 quantile_fp, COVID_TARGETS)
             self.assertEqual(1, len(error_messages))
             self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0])
             self.assertIn('no data rows in file', error_messages[0][1])
         except Exception as ex:
             self.fail(f"unexpected exception: {ex}")
Exemplo n.º 12
0
    def test_json_io_dict_from_quantile_csv_file_retractions(self):
        # test valid file with retractions
        with open(
                'tests/retractions/2020-07-04-YYG-ParamSearch-retractions.csv'
        ) as quantile_fp:
            try:
                json_io_dict, error_messages = json_io_dict_from_quantile_csv_file(
                    quantile_fp, COVID_TARGETS, covid19_row_validator,
                    COVID_ADDL_REQ_COLS)
                exp_json_io_dict = [{
                    'unit': 'US',
                    'target': '1 day ahead inc hosp',
                    'class': 'quantile',
                    'prediction': None
                }, {
                    'unit': 'US',
                    'target': '1 day ahead inc hosp',
                    'class': 'point',
                    'prediction': None
                }]
                self.assertEqual([], error_messages)
                self.assertEqual(exp_json_io_dict, json_io_dict['predictions'])
            except Exception as ex:
                self.fail(f"unexpected exception: {ex}")

        # test invalid file with retractions
        with open(
                'tests/retractions/2020-07-04-YYG-ParamSearch-bad-retractions.csv'
        ) as quantile_fp:
            try:
                _, error_messages = json_io_dict_from_quantile_csv_file(
                    quantile_fp, COVID_TARGETS, covid19_row_validator,
                    COVID_ADDL_REQ_COLS)
                self.assertEqual(1, len(error_messages))
                self.assertEqual(MESSAGE_QUANTILES_AND_VALUES,
                                 error_messages[0][0])
                self.assertIn(
                    "Retracted quantile values must all be 'NULL', but only some were",
                    error_messages[0][1])
            except Exception as ex:
                self.fail(f"unexpected exception: {ex}")
Exemplo n.º 13
0
 def test_json_io_dict_from_quantile_csv_file_bad_covid_fips_code(self):
     for csv_file in [
             'quantiles-bad-row-fip-one-digit.csv',
             'quantiles-bad-row-fip-three-digits.csv',
             'quantiles-bad-row-fip-bad-two-digits.csv'
     ]:
         with open('tests/' + csv_file) as quantile_fp:
             _, error_messages = \
                 json_io_dict_from_quantile_csv_file(quantile_fp, COVID_TARGETS, covid19_row_validator,
                                                     COVID_ADDL_REQ_COLS)
         self.assertEqual(1, len(error_messages))
         self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0])
         self.assertIn("invalid location for target", error_messages[0][1])
Exemplo n.º 14
0
    def test_covid_validation_date_format(self):
        # test that `covid19_row_validator()` checks these columns are YYYY-MM-DD format: forecast_date, target_end_date

        # ok dates: '2020-04-15-Geneva-DeterministicGrowth.csv'
        test_dir = 'tests/covid19-data-processed-examples/'
        with open(test_dir +
                  '2020-04-15-Geneva-DeterministicGrowth.csv') as quantile_fp:
            try:
                _, error_messages = \
                    json_io_dict_from_quantile_csv_file(quantile_fp, COVID_TARGETS, covid19_row_validator,
                                                        addl_req_cols=COVID_ADDL_REQ_COLS)
            except Exception as ex:
                self.fail(f"unexpected exception: {ex}")

        # bad date: '2020-04-15-Geneva-DeterministicGrowth_bad_forecast_date.csv'
        with open(test_dir +
                  '2020-04-15-Geneva-DeterministicGrowth_bad_forecast_date.csv'
                  ) as quantile_fp:
            _, error_messages = \
                json_io_dict_from_quantile_csv_file(quantile_fp, COVID_TARGETS, covid19_row_validator,
                                                    addl_req_cols=COVID_ADDL_REQ_COLS)
            self.assertEqual(1, len(error_messages))
            self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0])
            self.assertIn("invalid forecast_date or target_end_date format",
                          error_messages[0][1])

        # bad date: '2020-04-15-Geneva-DeterministicGrowth_bad_target_end_date.csv'
        with open(
                test_dir +
                '2020-04-15-Geneva-DeterministicGrowth_bad_target_end_date.csv'
        ) as quantile_fp:
            _, error_messages = \
                json_io_dict_from_quantile_csv_file(quantile_fp, COVID_TARGETS, covid19_row_validator,
                                                    addl_req_cols=COVID_ADDL_REQ_COLS)
            self.assertEqual(1, len(error_messages))
            self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0])
            self.assertIn("invalid forecast_date or target_end_date format",
                          error_messages[0][1])
Exemplo n.º 15
0
    def test_optional_additional_required_column_names(self):
        # target, location, location_name, type, quantile,value:
        with open('tests/quantile-predictions.csv') as quantile_fp:
            _, error_messages = \
                json_io_dict_from_quantile_csv_file(quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp'],
                                                    addl_req_cols=COVID_ADDL_REQ_COLS)
            self.assertEqual(1, len(error_messages))
            self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0])
            self.assertIn(
                'invalid header. did not contain the required column(s)',
                error_messages[0][1])

        # forecast_date, target, target_end_date, location, location_name, type, quantile, value:
        with open(
                'tests/covid19-data-processed-examples/2020-04-15-Geneva-DeterministicGrowth.csv'
        ) as quantile_fp:
            try:
                json_io_dict_from_quantile_csv_file(
                    quantile_fp,
                    ['1 wk ahead cum death', '1 day ahead inc hosp'],
                    addl_req_cols=COVID_ADDL_REQ_COLS)
            except Exception as ex:
                self.fail(f"unexpected exception: {ex}")
Exemplo n.º 16
0
 def test_json_io_dict_from_quantile_csv_file_dup_points(self):
     with open('tests/quantiles-duplicate-points.csv') as quantile_fp:
         _, act_error_messages = json_io_dict_from_quantile_csv_file(
             quantile_fp, ['1 day ahead inc hosp'])
         exp_error_messages = [
             (MESSAGE_QUANTILES_AND_VALUES,
              "Within a Prediction, there cannot be more than 1 Prediction Element of the same "
              "class. Found these duplicate unit/target/classes tuples: [('04', '1 day ahead "
              "inc hosp', ['point', 'point'])]"),
             (MESSAGE_QUANTILES_AS_A_GROUP,
              "There must be zero or one point prediction for each location/target pair. Found "
              "these unit, target, point counts tuples did not have exactly one point: [('04', "
              "'1 day ahead inc hosp', 2)]")
         ]
         self.assertEqual(exp_error_messages, act_error_messages)
Exemplo n.º 17
0
 def test_json_io_dict_from_quantile_csv_file_ok(self):
     for quantile_file in [
             'tests/quantile-predictions-5-col.csv',
             'tests/quantile-predictions.csv'
     ]:
         with open(quantile_file) as quantile_fp, \
                 open('tests/quantile-predictions.json') as exp_json_fp:
             exp_json_io_dict = json.load(exp_json_fp)
             act_json_io_dict, _ = json_io_dict_from_quantile_csv_file(
                 quantile_fp,
                 ['1 wk ahead cum death', '1 day ahead inc hosp'])
             exp_json_io_dict['predictions'].sort(
                 key=lambda _: (_['unit'], _['target'], _['class']))
             act_json_io_dict['predictions'].sort(
                 key=lambda _: (_['unit'], _['target'], _['class']))
             self.assertEqual(exp_json_io_dict, act_json_io_dict)
Exemplo n.º 18
0
 def test_error_messages_actual_files_no_errors(self):
     # test large-ish actual files
     ok_quantile_files = [
         # '2020-04-12-IHME-CurveFit.csv',  # errors. tested below
         # '2020-04-15-Geneva-DeterministicGrowth.csv',  # ""
         '2020-04-13-COVIDhub-ensemble.csv',
         '2020-04-12-Imperial-ensemble1.csv',
         '2020-04-13-MOBS_NEU-GLEAM_COVID.csv'
     ]
     for quantile_file in ok_quantile_files:
         with open('tests/covid19-data-processed-examples/' +
                   quantile_file) as quantile_fp:
             _, error_messages = json_io_dict_from_quantile_csv_file(
                 quantile_fp, COVID_TARGETS, covid19_row_validator,
                 COVID_ADDL_REQ_COLS)
             self.assertEqual(0, len(error_messages))
Exemplo n.º 19
0
def upload_forecast(forecast_name):
    path = get_forecast_info(forecast_name)
    db = read_validation_db()

    metadata = metadata_dict_for_file(
        list(Path(path).parent.glob('metadata-*.txt'))[0])
    if f"{metadata['team_abbr']}-{metadata['model_abbr']}" not in [
            m.abbreviation for m in models
    ]:
        create_model(path, metadata)

    time_zero_date = '-'.join(forecast_name.split('-')[:3])

    if time_zero_date not in [
            timezero.timezero_date for timezero in project_obj.timezeros
    ]:
        create_timezero(time_zero_date)

    # print(forecast_name, metadata, time_zero_date)
    if path is not None:
        errors_from_validation = validate_quantile_csv_file(path)
        if errors_from_validation != "no errors":
            print(errors_from_validation)
            return errors_from_validation, True
        with open(path) as fp:
            print('uploading %s' % path)
            checksum = hashlib.md5(str(fp.read()).encode('utf-8')).hexdigest()
            fp.seek(0)
            quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS)

            if len(error_from_transformation) > 0:
                return error_from_transformation, True

            try:
                fr = util.upload_forecast(
                    conn, quantile_json, forecast, project_name,
                    f"{metadata['team_abbr']}-{metadata['model_abbr']}",
                    time_zero_date)
                db[forecast_name] = checksum
                write_db(db)
                return None, fr
            except Exception as e:
                raise e
                return e, True
    pass
Exemplo n.º 20
0
    def test_covid19_point_with_nonempty_quantile_validation(self):
        # test whether a point row has an empty quantile column
        with open(
                'tests/covid19-data-processed-examples/covid19-predictions-point-nonempty-quantile.csv'
        ) as quantile_fp:
            try:
                _, error_messages = \
                    json_io_dict_from_quantile_csv_file(quantile_fp, COVID_TARGETS, covid19_row_validator,
                                                        addl_req_cols=COVID_ADDL_REQ_COLS)
                self.assertEqual(1, len(error_messages))
                self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0])
                self.assertIn(
                    "entries in the `quantile` column must be empty for `point` entries.",
                    error_messages[0][1])

            except Exception as ex:
                self.fail(f"unexpected exception: {ex}")
Exemplo n.º 21
0
def validate_quantile_csv_file(csv_fp):
    """
    A simple wrapper of `json_io_dict_from_quantile_csv_file()` that tosses the json_io_dict and just prints validation
    error_messages.

    :param csv_fp: as passed to `json_io_dict_from_quantile_csv_file()`
    :return: error_messages: a list of strings
    """
    quantile_csv_file = Path(csv_fp)
    click.echo(f"* validating quantile_csv_file '{quantile_csv_file}'...")
    with open(quantile_csv_file) as cdc_csv_fp:
        # toss json_io_dict:
        _, error_messages = json_io_dict_from_quantile_csv_file(cdc_csv_fp, COVID_TARGETS, covid19_row_validator,
                                                                COVID_ADDL_REQ_COLS)
        if error_messages:
            return summarized_error_messages(error_messages)  # summarizes and orders, converting 2-tuples to strings
        else:
            return "no errors"
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_names

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)

    # Get model name or create a new model if it's not in the current Zoltar project
    metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                      'metadata-' + dir_name + '.txt')
    model_name = metadata['model_name']
    if model_name not in model_names:
        model_config = {}
        model_config['name'], model_config['abbreviation'], model_config['team_name'], model_config['description'], model_config['home_url'], model_config['aux_data_url'] \
            = metadata['model_name'], metadata['model_abbr'], metadata['team_name'], metadata['methods'], url + dir_name, 'NA'
        try:
            project_obj.create_model(model_config)
            models = project_obj.models
            model_names = [model.name for model in models]
        except Exception as ex:
            return ex
    model = [model for model in models if model.name == model_name][0]

    # Get names of existing forecasts to avoid re-upload
    existing_forecasts = [forecast.source for forecast in model.forecasts]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:

        # Skip if forecast is already on zoltar
        if forecast in existing_forecasts:
            continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts + forecast) as fp:

            # Get timezero and create timezero on zoltar if not existed
            time_zero_date = forecast.split(dir_name)[0][:-1]
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    return ex

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, VALID_TARGET_NAMES, covid19_row_validator)
                if len(error_from_transformation) > 0:
                    return error_from_transformation
                else:
                    # try:
                    #     util.upload_forecast(conn, quantile_json, forecast,
                    #                             project_name, model_name , time_zero_date, overwrite=False)
                    # except Exception as ex:
                    #     print(ex)
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()

    # Batch upload for better performance
    if len(json_io_dict_batch) > 0:
        try:
            util.upload_forecast_batch(conn, json_io_dict_batch,
                                       forecast_filename_batch, project_name,
                                       model_name, timezero_date_batch)
        except Exception as ex:
            return ex
    return "Pass"
Exemplo n.º 23
0
def upload_covid_all_forecasts(path_to_processed_model_forecasts):
    # meta info
    project_name = 'COVID-19 Forecasts'
    project_obj = None
    project_timezeros = []
    forecasts = os.listdir(path_to_processed_model_forecasts)
    conn = util.authenticate()

    # Get all existing timezeros in the project
    for project in conn.projects:
        if project.name == project_name:
            project_obj = project
            for timezero in project.timezeros:
                project_timezeros.append(timezero.timezero_date)
            break

    # Get model name
    separator = '-'
    dir_name = separator.join(
        forecasts[0].split(separator)[3:]).split('.csv')[0]
    metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                      'metadata-' + dir_name + '.txt')
    model_name = metadata['model_name']
    model = [
        model for model in project_obj.models if model.name == model_name
    ][0]

    # Get names of existing forecasts to avoid re-upload
    existing_forecasts = [forecast.source for forecast in model.forecasts]

    for forecast in forecasts:

        # Skip if forecast is already on zoltar
        if forecast in existing_forecasts:
            continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts + forecast) as fp:

            # Get timezero and create timezero on zoltar if not existed
            time_zero_date = forecast.split(dir_name)[0][:-1]
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                except Exception as ex:
                    print(ex)

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, COVID19_TARGET_NAMES, covid19_row_validator)
                if len(error_from_transformation) > 0:
                    print(error_from_transformation)
                else:
                    try:
                        util.upload_forecast(conn,
                                             quantile_json,
                                             forecast,
                                             project_name,
                                             model_name,
                                             time_zero_date,
                                             overwrite=False)
                    except Exception as ex:
                        print(ex)
            else:
                print(errors_from_validation)
            fp.close()
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_abbrs

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)

    # Get model name or create a new model if it's not in the current Zoltar project
    try:
        metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                          'metadata-' + dir_name + '.txt')
    except Exception as ex:
        return ex
    model_abbreviation = metadata['model_abbr']

    # get the corresponding model_config for the metadata file
    model_config = zoltar_config_from_metadata(metadata)

    if model_abbreviation not in model_abbrs:
        pprint.pprint('%s not in models' % model_abbreviation)
        if 'home_url' not in model_config:
            model_config['home_url'] = url + dir_name

        try:
            logger.info(f"Creating model {model_config}")
            models.append(project_obj.create_model(model_config))
            model_abbrs = [model.abbreviation for model in models]
        except Exception as ex:
            return ex

    # fetch model based on model_abbr
    model = [
        model for model in models if model.abbreviation == model_abbreviation
    ][0]

    if has_changed(metadata, model):
        # model metadata has changed, call the edit function in zoltpy to update metadata
        print(
            f"{metadata['model_abbr']!r} model has changed metadata contents. Updating on Zoltar..."
        )
        model.edit(model_config)

    # Get names of existing forecasts to avoid re-upload
    existing_time_zeros = [
        forecast.timezero.timezero_date for forecast in model.forecasts
    ]

    # Convert all timezeros from Date type to str type
    existing_time_zeros = [
        existing_time_zero.strftime(YYYY_MM_DD_DATE_FORMAT)
        for existing_time_zero in existing_time_zeros
    ]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:

        # Skip metadata text file
        if not forecast.endswith('.csv'):
            continue

        # Default config
        over_write = False
        checksum = 0
        time_zero_date = forecast.split(dir_name)[0][:-1]

        # Check if forecast is already on zoltar
        with open(path_to_processed_model_forecasts + forecast, "rb") as f:
            # Get the current hash of a processed file
            checksum = hashlib.md5(f.read()).hexdigest()
            f.close()

            # Check this hash against the previous version of hash
            if db.get(forecast, None) != checksum:
                print(forecast, db.get(forecast, None))
                if time_zero_date in existing_time_zeros:

                    # Check if the already existing forecast has the same issue date

                    from datetime import date
                    local_issue_date = date.today().strftime("%Y-%m-%d")

                    uploaded_forecast = [
                        forecast for forecast in model.forecasts
                        if forecast.timezero.timezero_date.strftime(
                            YYYY_MM_DD_DATE_FORMAT) == time_zero_date
                    ][0]
                    uploaded_issue_date = uploaded_forecast.issue_date

                    if local_issue_date == uploaded_issue_date:
                        # Overwrite the existing forecast if has the same issue date
                        over_write = True
                        logger.info(
                            f"Overwrite existing forecast={forecast} with newer version because the new issue_date={local_issue_date} is the same as the uploaded file issue_date={uploaded_issue_date}"
                        )
                    else:
                        logger.info(
                            f"Add newer version to forecast={forecast} because the new issue_date={local_issue_date} is different from uploaded file issue_date={uploaded_issue_date}"
                        )

            else:
                continue

        with open(path_to_processed_model_forecasts + forecast) as fp:
            # Create timezero on zoltar if not existed
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    print(ex)
                    return ex

            # Validate covid19 file
            print(f"Validating {forecast}")
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, COVID_TARGETS, covid19_row_validator,
                    COVID_ADDL_REQ_COLS)
                if len(error_from_transformation) > 0:
                    return error_from_transformation
                else:
                    try:
                        logger.debug(
                            'Upload forecast for model: %s \t|\t File: %s\n' %
                            (metadata['model_abbr'], forecast))
                        upload_covid_forecast_by_model(conn,
                                                       quantile_json,
                                                       forecast,
                                                       project_name,
                                                       model,
                                                       metadata['model_abbr'],
                                                       time_zero_date,
                                                       overwrite=over_write)
                        db[forecast] = checksum
                    except Exception as ex:
                        logger.error(ex)
                        return ex
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()

    # # Batch upload for better performance
    # if len(json_io_dict_batch) > 0:
    #     try:
    #         util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write)
    #     except Exception as ex:
    #         return ex
    return "Pass"
Exemplo n.º 25
0
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_names

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)

    # Get model name or create a new model if it's not in the current Zoltar project
    try:
        metadata = metadata_dict_for_file(path_to_processed_model_forecasts+'metadata-'+dir_name+'.txt')
    except Exception as ex:
        return ex 
    model_name = metadata['model_name']
    if model_name not in model_names:
        model_config = {}
        model_config['name'], model_config['abbreviation'], model_config['team_name'], model_config['description'], model_config['home_url'], model_config['aux_data_url'] \
            = metadata['model_name'], metadata['team_abbr']+'-'+metadata['model_abbr'], metadata['team_name'], metadata['methods'], url + dir_name, 'NA'
        try:
            project_obj.create_model(model_config)
            models = project_obj.models
            model_names = [model.name for model in models]
        except Exception as ex:
            return ex  
    model = [model for model in models if model.name == model_name][0]

    # Get names of existing forecasts to avoid re-upload
    existing_forecasts = [forecast.source for forecast in model.forecasts]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:
        over_write = False
        checksum = 0
        # Check if forecast is already on zoltar
        with open(path_to_processed_model_forecasts+forecast, "rb") as f:
            # Get the current hash of a processed file
            checksum = hashlib.md5(f.read()).hexdigest()
            f.close()

            # Check this hash against the previous version of hash
            if db.get(forecast, None) != checksum:
                print(forecast)
                if forecast in existing_forecasts:
                    over_write = True
            else:
                continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts+forecast) as fp:

            # Get timezero and create timezero on zoltar if not existed
            time_zero_date = forecast.split(dir_name)[0][:-1]
            # if time_zero_date != "2020-05-25":
            #     continue
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    return ex

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(path_to_processed_model_forecasts+forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(fp, VALID_TARGET_NAMES, covid19_row_validator)
                if len(error_from_transformation) >0 :
                    return error_from_transformation
                else:
                    try:
                        util.upload_forecast(conn, quantile_json, forecast, 
                                                project_name, model_name , time_zero_date, overwrite=over_write)
                        db[forecast] = checksum
                    except Exception as ex:
                        print(ex)
                        return ex
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()
    
    # # Batch upload for better performance
    # if len(json_io_dict_batch) > 0:
    #     try:
    #         util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write)
    #     except Exception as ex:
    #         return ex
    return "Pass"
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name):
    global models
    global model_names

    # Get all forecasts in the directory of this model
    forecasts = os.listdir(path_to_processed_model_forecasts)
    conn.re_authenticate_if_necessary()
    # Get model name or create a new model if it's not in the current Zoltar project
    try:
        metadata = metadata_dict_for_file(path_to_processed_model_forecasts +
                                          'metadata-' + dir_name + '.txt')
    except Exception as ex:
        return ex
    model_name = metadata['model_name']
    if model_name not in model_names:
        model_config = {}
        model_config['name'], model_config['abbreviation'], model_config['team_name'], \
        model_config['description'], model_config['home_url'], model_config['aux_data_url'] \
            = metadata['model_name'], metadata['team_abbr'] + '-' + metadata['model_abbr'], \
              metadata['team_name'], metadata['methods'], metadata['website_url'] if metadata.get(
            'website_url') != None else url + dir_name, 'NA'
        try:
            print('Create model %s' % model_name)
            project_obj.create_model(model_config)
            models = project_obj.models
            model_names = [model.name for model in models]
        except Exception as ex:
            return ex
    print('Time: %s \t Model: %s' % (datetime.now(), model_name))
    model = [model for model in models if model.name == model_name][0]

    # Get names of existing forecasts to avoid re-upload
    existing_time_zeros = [
        forecast.timezero.timezero_date for forecast in model.forecasts
    ]

    # Batch upload
    json_io_dict_batch = []
    forecast_filename_batch = []
    timezero_date_batch = []

    for forecast in forecasts:

        # Default config
        over_write = False
        checksum = 0
        time_zero_date = forecast.split(dir_name)[0][:-1]

        # Check if forecast is already on zoltar
        with open(path_to_processed_model_forecasts + forecast, "rb") as f:
            # Get the current hash of a processed file
            checksum = hashlib.md5(f.read()).hexdigest()
            f.close()

            # Check this hash against the previous version of hash
            # if db.get(forecast, None) != checksum:
            #     print(forecast, db.get(forecast, None))
            #     if time_zero_date in existing_time_zeros:
            #         over_write = True
            # else:
            #     continue

            # if timezero existing, then don't write again
            if time_zero_date in existing_time_zeros:
                #update checksum
                # db[forecast] = checksum
                continue

        # Skip metadata text file
        if '.txt' in forecast:
            continue

        with open(path_to_processed_model_forecasts + forecast) as fp:
            # Create timezero on zoltar if not existed
            if time_zero_date not in project_timezeros:
                try:
                    project_obj.create_timezero(time_zero_date)
                    project_timezeros.append(time_zero_date)
                except Exception as ex:
                    return ex

            # Validate covid19 file
            errors_from_validation = validate_quantile_csv_file(
                path_to_processed_model_forecasts + forecast)

            # Upload forecast
            if "no errors" == errors_from_validation:
                quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(
                    fp, COVID_TARGETS, covid19_row_validator,
                    COVID_ADDL_REQ_COLS)
                if len(error_from_transformation) > 0:
                    return error_from_transformation
                else:
                    try:
                        print('Upload forecast for model: %s \t|\t File: %s' %
                              (model_name, forecast))
                        print()
                        util.upload_forecast(conn,
                                             quantile_json,
                                             forecast,
                                             project_name,
                                             model_name,
                                             time_zero_date,
                                             overwrite=over_write)
                        db[forecast] = checksum
                    except Exception as ex:
                        print(ex)
                        return ex
                    json_io_dict_batch.append(quantile_json)
                    timezero_date_batch.append(time_zero_date)
                    forecast_filename_batch.append(forecast)
            else:
                return errors_from_validation
            fp.close()

    # # Batch upload for better performance
    # if len(json_io_dict_batch) > 0:
    #     try:
    #         util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write)
    #     except Exception as ex:
    #         return ex
    return "Pass"
Exemplo n.º 27
0
def zoltar_connection_app():
    """
    Application demonstrating use of the library at the ZoltarConnection level (rather than using the package's
    higher-level functions such as delete_forecast(), etc.)

    - App args: None
    - Required environment variables:
      - 'Z_HOST': Zoltar host to connect to. typically "https://www.zoltardata.com"
      - 'Z_USERNAME': username of the account that has permission to access the resources in above app args
      - 'Z_PASSWORD': password ""
    """
    host = os.environ.get('Z_HOST')
    username = os.environ.get('Z_USERNAME')
    password = os.environ.get('Z_PASSWORD')

    #
    # try out non-destructive functions
    #

    # work with a connection
    conn = ZoltarConnection(host)
    conn.authenticate(username, password)
    print('\n* projects')
    for project in conn.projects:
        print(f'- {project}, {project.id}, {project.name}')

    # work with a project
    project = [
        project for project in conn.projects
        if project.name == 'Docs Example Project'
    ][0]
    print(f'\n* working with {project}')
    print(f"- objects in {project}:\n"
          f"  = units: {project.units}\n"
          f"  = targets: {project.targets}\n"
          f"  = timezeros: {project.timezeros}\n"
          f"  = models: {project.models}")

    # get the project's truth detail
    print(f'\n* truth for {project}')
    print(
        f'- source, created_at: {project.truth_source}, {project.truth_created_at}'
    )

    # get the project's latest forecasts
    print(f'\n* latests forecasts for {project}')
    print(f'- source, created_at: {project.latest_forecasts}')

    # work with a model
    model = [
        model for model in project.models
        if model.name == 'docs forecast model'
    ][0]
    print(f'\n* working with {model}')
    print(f'- forecasts: {model.forecasts}')

    # work with a forecast
    forecast = model.forecasts[0]
    print(f'\n* working with {forecast}')

    forecast_data = forecast.data()
    print(f"- data: {len(forecast_data['predictions'])} predictions"
          )  # 26 predictions

    # work with a cdc csv file
    cdc_csv_file = "tests/EW01-2011-ReichLab_kde_US_National.csv"
    print(f'\n* working with a cdc csv file: {cdc_csv_file}')
    with open(cdc_csv_file) as fp:
        json_io_dict = json_io_dict_from_cdc_csv_file(2011, fp)
    print(
        f"- converted cdc data to json: {len(json_io_dict['predictions'])} predictions"
    )  # 154 predictions

    # work with a quantile csv file
    quantile_csv_file = "tests/quantile-predictions.csv"
    print(f'\n* working with a quantile csv file: {quantile_csv_file}')
    with open(quantile_csv_file) as fp:
        json_io_dict, error_messages = \
            json_io_dict_from_quantile_csv_file(fp, ['1 wk ahead cum death', '1 day ahead inc hosp'])
    print(
        f"- converted quantile data to json: {len(json_io_dict['predictions'])} predictions"
    )  # 5 predictions

    # convert to a Pandas DataFrame
    print(f'\n* working with a pandas data frame')
    dataframe = dataframe_from_json_io_dict(forecast_data)
    print(f'- dataframe: {dataframe}')

    # query forecast data
    print(f"\n* querying forecast data")
    query = {
        'targets': ['pct next week', 'cases next week'],
        'types': ['point']
    }
    job = project.submit_query(QueryType.FORECASTS, query)
    busy_poll_job(job)  # does refresh()
    rows = job.download_data()
    print(f"- got {len(rows)} forecast rows. as a dataframe:")
    print(dataframe_from_rows(rows))

    # query truth data
    print(f"\n* querying truth data")
    query = {'targets': ['pct next week', 'cases next week']}
    job = project.submit_query(QueryType.TRUTH, query)
    busy_poll_job(job)  # does refresh()
    rows = job.download_data()
    print(f"- got {len(rows)} truth rows. as a dataframe:")
    print(dataframe_from_rows(rows))

    #
    # try out destructive functions
    #

    # create a sandbox project to play with, deleting the existing one if any: docs-project.json
    project = [
        project for project in conn.projects if project.name == 'My project'
    ]
    project = project[0] if project else None
    if project:
        print(f"\n* deleting project {project}")
        project.delete()
        print("- deleted project")

    print(f"\n* creating project")
    project = create_project(
        conn, "examples/docs-project.json")  # "name": "My project"
    print(f"- created project: {project}")

    # upload truth
    print(f"\n* uploading truth")
    with open('tests/docs-ground-truth.csv') as csv_fp:
        job = project.upload_truth_data(csv_fp)
    busy_poll_job(job)
    print(f"- upload truth done")

    # create a model, upload a forecast, query the project, then delete it
    print(f"\n* creating model")
    with open("examples/example-model-config.json") as fp:
        model = project.create_model(json.load(fp))
    print(f"- created model: {model}")

    print(f"\n* uploading forecast. pre-upload forecasts: {model.forecasts}")
    with open("examples/docs-predictions.json") as fp:
        json_io_dict = json.load(fp)
        job = model.upload_forecast(json_io_dict, "docs-predictions.json",
                                    "2011-10-02", "some predictions")
    busy_poll_job(job)
    new_forecast = job.created_forecast()
    print(f"- uploaded forecast: {new_forecast}")

    model.refresh()
    print(f'\n* post-upload forecasts: {model.forecasts}')

    print(f"\n* deleting forecast: {new_forecast}")
    job = new_forecast.delete()
    busy_poll_job(job)
    print(f"- deleting forecast: done")

    # clean up by deleting the sandbox project. NB: This will delete all of the data associated with the project without
    # warning, including models and forecasts
    print(f"\n* deleting project {project}")
    project.delete()
    print("- deleted project")

    print("\n* app done!")
Exemplo n.º 28
0
 def test_json_io_dict_from_quantile_csv_file_no_points(self):
     with open('tests/quantile-predictions-no-point.csv') as quantile_fp:
         _, error_messages = json_io_dict_from_quantile_csv_file(
             quantile_fp, ['1 day ahead inc hosp', '1 wk ahead cum death'])
         self.assertEqual(0, len(error_messages))