def test_county_cases(self): # test blue sky with open('tests/county-examples/correct.csv') as quantile_fp: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) self.assertEqual(0, len(error_messages)) # test invalid combinations file_exp_num_errors_messages = [ ('invalid-inc-hosp-target-for-county.csv', 8, 'invalid location for target'), ('invalid-quantiles-for-case-target.csv', 16, 'invalid quantile for target'), ('invalid-wk-cum-death-target-for-county.csv', 8, 'invalid location for target'), ('invalid-wk-inc-death-target-for-county.csv', 8, 'invalid location for target'), ] for quantile_file, exp_num_errors, exp_message in file_exp_num_errors_messages: with open('tests/county-examples/' + quantile_file) as quantile_fp: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) self.assertEqual(exp_num_errors, len(error_messages)) self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0]) self.assertIn( exp_message, error_messages[0] [1]) # arbitrarily pick first message. all are similar
def test_json_io_dict_from_quantile_csv_file_calls_validate_header(self): column_index_dict = { 'target': 0, 'location': 1, 'type': 2, 'quantile': 3, 'value': 4 } with patch('zoltpy.quantile_io._validate_header', return_value=(column_index_dict, None)) as mock, \ open('tests/quantile-predictions.csv') as quantile_fp: json_io_dict_from_quantile_csv_file( quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp']) self.assertEqual(1, mock.call_count)
def test_json_io_dict_from_point_csv_file_bad_values(self): # test various bad point values, quantile values, and quantiles file_exp_num_errors_messages = [ ('quantile-predictions-empty-point.csv', 1, 'entries in the `value` column must be an int or float'), ('quantile-predictions-nan-point.csv', 1, 'entries in the `value` column must be an int or float'), ('quantile-predictions-inf-point.csv', 1, 'entries in the `value` column must be an int or float'), ('quantile-predictions-empty-quantile-value.csv', 2, 'entries in the `value` column must be an int or float'), ('quantile-predictions-inf-quantile-value.csv', 1, 'entries in the `value` column must be an int or float'), ('quantile-predictions-nan-quantile-value.csv', 2, 'entries in the `value` column must be an int or float'), ('quantile-predictions-nan-quantile.csv', 1, 'entries in the `quantile` column must be an int or float in [0, 1]' ), ] for quantile_file, exp_num_errors, exp_message in file_exp_num_errors_messages: with open('tests/bad-values/' + quantile_file) as quantile_fp: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp']) self.assertEqual(exp_num_errors, len(error_messages)) self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0]) # note: for those with 2 errors, the 2nd one (MESSAGE_QUANTILES_AND_VALUES) is checked elsewhere self.assertIn(exp_message, error_messages[0][1])
def test_error_messages_actual_file_with_errors(self): file_exp_num_errors_message_priority_messages = [ ('2020-04-12-IHME-CurveFit.csv', 5, MESSAGE_QUANTILES_AND_VALUES, [ "Entries in `value` must be non-decreasing as quantiles increase" ]), ('2020-04-15-Geneva-DeterministicGrowth.csv', 9, MESSAGE_FORECAST_CHECKS, ["invalid location for target", "invalid target name(s)"]), ('2020-05-17-CovidActNow-SEIR_CAN.csv', 10, MESSAGE_FORECAST_CHECKS, ["entries in the `value` column must be non-negative"]), ('2020-06-21-USC-SI_kJalpha.csv', 1, MESSAGE_FORECAST_CHECKS, ["entries in the `value` column must be non-negative"]), ] for quantile_file, exp_num_errors, exp_priority, exp_error_messages in \ file_exp_num_errors_message_priority_messages: with open('tests/covid19-data-processed-examples/' + quantile_file) as quantile_fp: _, act_error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS, covid19_row_validator, addl_req_cols=COVID_ADDL_REQ_COLS) self.assertEqual(exp_num_errors, len(act_error_messages), exp_error_messages) for act_priority, act_error_message in act_error_messages: self.assertEqual(exp_priority, act_priority) self.assertTrue( any([ exp_error_message in act_error_message for exp_error_message in exp_error_messages ]))
def test_other_ok_quantile_files(self): with open('tests/quantiles-CU-60contact.csv') as quantile_fp: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS, covid19_row_validator, addl_req_cols=COVID_ADDL_REQ_COLS) self.assertEqual(0, len(error_messages))
def test_json_io_dict_from_quantile_csv_file_nan(self): with open('tests/quantile-predictions-nan-point.csv') as quantile_fp: _, error_messages = \ json_io_dict_from_quantile_csv_file(quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp']) self.assertEqual(1, len(error_messages)) self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0]) self.assertIn('entries in the `value` column must be an int or float', error_messages[0][1]) with open( 'tests/quantile-predictions-nan-quantile.csv') as quantile_fp: _, error_messages = \ json_io_dict_from_quantile_csv_file(quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp']) self.assertEqual(1, len(error_messages)) self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0]) self.assertIn( 'entries in the `quantile` column must be an int or float in [0, 1]', error_messages[0][1])
def test_json_io_dict_from_quantile_csv_file_no_points(self): with open('tests/quantile-predictions-no-point.csv') as quantile_fp: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, ['1 day ahead inc hosp', '1 wk ahead cum death']) self.assertEqual(1, len(error_messages)) self.assertEqual(MESSAGE_QUANTILES_AS_A_GROUP, error_messages[0][0]) self.assertIn( "There must be exactly one point prediction for each location/target pair", error_messages[0][1])
def test_json_io_dict_from_quantile_csv_file_small_tolerance(self): with open( 'tests/covid19-data-processed-examples/2020-04-20-YYG-ParamSearch-small.csv' ) as quantile_fp: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS, covid19_row_validator, addl_req_cols=COVID_ADDL_REQ_COLS) self.assertEqual(0, len(error_messages))
def test_json_io_dict_from_invalid_type_header(self): with open( 'tests/covid19-data-processed-examples/2020-04-20-YYG-invalid-type.csv' ) as quantile_fp: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS, covid19_row_validator, addl_req_cols=COVID_ADDL_REQ_COLS) self.assertEqual(1, len(error_messages))
def test_json_io_dict_from_quantile_csv_file_bad_row_count(self): with open('tests/quantiles-bad-row-count.csv' ) as quantile_fp: # header: 6, row: 5 _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS) exp_errors = [( MESSAGE_FORECAST_CHECKS, "invalid number of items in row. len(header)=5 but len(row)=4. " "row=['1 wk ahead cum death', 'point', 'NA', '7.74526423651839']" )] self.assertEqual(exp_errors, error_messages)
def test_empty_forecast(self): with open('tests/bad-values/quantile-predictions-no-data.csv' ) as quantile_fp: try: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS) self.assertEqual(1, len(error_messages)) self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0]) self.assertIn('no data rows in file', error_messages[0][1]) except Exception as ex: self.fail(f"unexpected exception: {ex}")
def test_json_io_dict_from_quantile_csv_file_retractions(self): # test valid file with retractions with open( 'tests/retractions/2020-07-04-YYG-ParamSearch-retractions.csv' ) as quantile_fp: try: json_io_dict, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) exp_json_io_dict = [{ 'unit': 'US', 'target': '1 day ahead inc hosp', 'class': 'quantile', 'prediction': None }, { 'unit': 'US', 'target': '1 day ahead inc hosp', 'class': 'point', 'prediction': None }] self.assertEqual([], error_messages) self.assertEqual(exp_json_io_dict, json_io_dict['predictions']) except Exception as ex: self.fail(f"unexpected exception: {ex}") # test invalid file with retractions with open( 'tests/retractions/2020-07-04-YYG-ParamSearch-bad-retractions.csv' ) as quantile_fp: try: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) self.assertEqual(1, len(error_messages)) self.assertEqual(MESSAGE_QUANTILES_AND_VALUES, error_messages[0][0]) self.assertIn( "Retracted quantile values must all be 'NULL', but only some were", error_messages[0][1]) except Exception as ex: self.fail(f"unexpected exception: {ex}")
def test_json_io_dict_from_quantile_csv_file_bad_covid_fips_code(self): for csv_file in [ 'quantiles-bad-row-fip-one-digit.csv', 'quantiles-bad-row-fip-three-digits.csv', 'quantiles-bad-row-fip-bad-two-digits.csv' ]: with open('tests/' + csv_file) as quantile_fp: _, error_messages = \ json_io_dict_from_quantile_csv_file(quantile_fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) self.assertEqual(1, len(error_messages)) self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0]) self.assertIn("invalid location for target", error_messages[0][1])
def test_covid_validation_date_format(self): # test that `covid19_row_validator()` checks these columns are YYYY-MM-DD format: forecast_date, target_end_date # ok dates: '2020-04-15-Geneva-DeterministicGrowth.csv' test_dir = 'tests/covid19-data-processed-examples/' with open(test_dir + '2020-04-15-Geneva-DeterministicGrowth.csv') as quantile_fp: try: _, error_messages = \ json_io_dict_from_quantile_csv_file(quantile_fp, COVID_TARGETS, covid19_row_validator, addl_req_cols=COVID_ADDL_REQ_COLS) except Exception as ex: self.fail(f"unexpected exception: {ex}") # bad date: '2020-04-15-Geneva-DeterministicGrowth_bad_forecast_date.csv' with open(test_dir + '2020-04-15-Geneva-DeterministicGrowth_bad_forecast_date.csv' ) as quantile_fp: _, error_messages = \ json_io_dict_from_quantile_csv_file(quantile_fp, COVID_TARGETS, covid19_row_validator, addl_req_cols=COVID_ADDL_REQ_COLS) self.assertEqual(1, len(error_messages)) self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0]) self.assertIn("invalid forecast_date or target_end_date format", error_messages[0][1]) # bad date: '2020-04-15-Geneva-DeterministicGrowth_bad_target_end_date.csv' with open( test_dir + '2020-04-15-Geneva-DeterministicGrowth_bad_target_end_date.csv' ) as quantile_fp: _, error_messages = \ json_io_dict_from_quantile_csv_file(quantile_fp, COVID_TARGETS, covid19_row_validator, addl_req_cols=COVID_ADDL_REQ_COLS) self.assertEqual(1, len(error_messages)) self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0]) self.assertIn("invalid forecast_date or target_end_date format", error_messages[0][1])
def test_optional_additional_required_column_names(self): # target, location, location_name, type, quantile,value: with open('tests/quantile-predictions.csv') as quantile_fp: _, error_messages = \ json_io_dict_from_quantile_csv_file(quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp'], addl_req_cols=COVID_ADDL_REQ_COLS) self.assertEqual(1, len(error_messages)) self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0]) self.assertIn( 'invalid header. did not contain the required column(s)', error_messages[0][1]) # forecast_date, target, target_end_date, location, location_name, type, quantile, value: with open( 'tests/covid19-data-processed-examples/2020-04-15-Geneva-DeterministicGrowth.csv' ) as quantile_fp: try: json_io_dict_from_quantile_csv_file( quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp'], addl_req_cols=COVID_ADDL_REQ_COLS) except Exception as ex: self.fail(f"unexpected exception: {ex}")
def test_json_io_dict_from_quantile_csv_file_dup_points(self): with open('tests/quantiles-duplicate-points.csv') as quantile_fp: _, act_error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, ['1 day ahead inc hosp']) exp_error_messages = [ (MESSAGE_QUANTILES_AND_VALUES, "Within a Prediction, there cannot be more than 1 Prediction Element of the same " "class. Found these duplicate unit/target/classes tuples: [('04', '1 day ahead " "inc hosp', ['point', 'point'])]"), (MESSAGE_QUANTILES_AS_A_GROUP, "There must be zero or one point prediction for each location/target pair. Found " "these unit, target, point counts tuples did not have exactly one point: [('04', " "'1 day ahead inc hosp', 2)]") ] self.assertEqual(exp_error_messages, act_error_messages)
def test_json_io_dict_from_quantile_csv_file_ok(self): for quantile_file in [ 'tests/quantile-predictions-5-col.csv', 'tests/quantile-predictions.csv' ]: with open(quantile_file) as quantile_fp, \ open('tests/quantile-predictions.json') as exp_json_fp: exp_json_io_dict = json.load(exp_json_fp) act_json_io_dict, _ = json_io_dict_from_quantile_csv_file( quantile_fp, ['1 wk ahead cum death', '1 day ahead inc hosp']) exp_json_io_dict['predictions'].sort( key=lambda _: (_['unit'], _['target'], _['class'])) act_json_io_dict['predictions'].sort( key=lambda _: (_['unit'], _['target'], _['class'])) self.assertEqual(exp_json_io_dict, act_json_io_dict)
def test_error_messages_actual_files_no_errors(self): # test large-ish actual files ok_quantile_files = [ # '2020-04-12-IHME-CurveFit.csv', # errors. tested below # '2020-04-15-Geneva-DeterministicGrowth.csv', # "" '2020-04-13-COVIDhub-ensemble.csv', '2020-04-12-Imperial-ensemble1.csv', '2020-04-13-MOBS_NEU-GLEAM_COVID.csv' ] for quantile_file in ok_quantile_files: with open('tests/covid19-data-processed-examples/' + quantile_file) as quantile_fp: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) self.assertEqual(0, len(error_messages))
def upload_forecast(forecast_name): path = get_forecast_info(forecast_name) db = read_validation_db() metadata = metadata_dict_for_file( list(Path(path).parent.glob('metadata-*.txt'))[0]) if f"{metadata['team_abbr']}-{metadata['model_abbr']}" not in [ m.abbreviation for m in models ]: create_model(path, metadata) time_zero_date = '-'.join(forecast_name.split('-')[:3]) if time_zero_date not in [ timezero.timezero_date for timezero in project_obj.timezeros ]: create_timezero(time_zero_date) # print(forecast_name, metadata, time_zero_date) if path is not None: errors_from_validation = validate_quantile_csv_file(path) if errors_from_validation != "no errors": print(errors_from_validation) return errors_from_validation, True with open(path) as fp: print('uploading %s' % path) checksum = hashlib.md5(str(fp.read()).encode('utf-8')).hexdigest() fp.seek(0) quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file( fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) if len(error_from_transformation) > 0: return error_from_transformation, True try: fr = util.upload_forecast( conn, quantile_json, forecast, project_name, f"{metadata['team_abbr']}-{metadata['model_abbr']}", time_zero_date) db[forecast_name] = checksum write_db(db) return None, fr except Exception as e: raise e return e, True pass
def test_covid19_point_with_nonempty_quantile_validation(self): # test whether a point row has an empty quantile column with open( 'tests/covid19-data-processed-examples/covid19-predictions-point-nonempty-quantile.csv' ) as quantile_fp: try: _, error_messages = \ json_io_dict_from_quantile_csv_file(quantile_fp, COVID_TARGETS, covid19_row_validator, addl_req_cols=COVID_ADDL_REQ_COLS) self.assertEqual(1, len(error_messages)) self.assertEqual(MESSAGE_FORECAST_CHECKS, error_messages[0][0]) self.assertIn( "entries in the `quantile` column must be empty for `point` entries.", error_messages[0][1]) except Exception as ex: self.fail(f"unexpected exception: {ex}")
def validate_quantile_csv_file(csv_fp): """ A simple wrapper of `json_io_dict_from_quantile_csv_file()` that tosses the json_io_dict and just prints validation error_messages. :param csv_fp: as passed to `json_io_dict_from_quantile_csv_file()` :return: error_messages: a list of strings """ quantile_csv_file = Path(csv_fp) click.echo(f"* validating quantile_csv_file '{quantile_csv_file}'...") with open(quantile_csv_file) as cdc_csv_fp: # toss json_io_dict: _, error_messages = json_io_dict_from_quantile_csv_file(cdc_csv_fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) if error_messages: return summarized_error_messages(error_messages) # summarizes and orders, converting 2-tuples to strings else: return "no errors"
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name): global models global model_names # Get all forecasts in the directory of this model forecasts = os.listdir(path_to_processed_model_forecasts) # Get model name or create a new model if it's not in the current Zoltar project metadata = metadata_dict_for_file(path_to_processed_model_forecasts + 'metadata-' + dir_name + '.txt') model_name = metadata['model_name'] if model_name not in model_names: model_config = {} model_config['name'], model_config['abbreviation'], model_config['team_name'], model_config['description'], model_config['home_url'], model_config['aux_data_url'] \ = metadata['model_name'], metadata['model_abbr'], metadata['team_name'], metadata['methods'], url + dir_name, 'NA' try: project_obj.create_model(model_config) models = project_obj.models model_names = [model.name for model in models] except Exception as ex: return ex model = [model for model in models if model.name == model_name][0] # Get names of existing forecasts to avoid re-upload existing_forecasts = [forecast.source for forecast in model.forecasts] # Batch upload json_io_dict_batch = [] forecast_filename_batch = [] timezero_date_batch = [] for forecast in forecasts: # Skip if forecast is already on zoltar if forecast in existing_forecasts: continue # Skip metadata text file if '.txt' in forecast: continue with open(path_to_processed_model_forecasts + forecast) as fp: # Get timezero and create timezero on zoltar if not existed time_zero_date = forecast.split(dir_name)[0][:-1] if time_zero_date not in project_timezeros: try: project_obj.create_timezero(time_zero_date) project_timezeros.append(time_zero_date) except Exception as ex: return ex # Validate covid19 file errors_from_validation = validate_quantile_csv_file( path_to_processed_model_forecasts + forecast) # Upload forecast if "no errors" == errors_from_validation: quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file( fp, VALID_TARGET_NAMES, covid19_row_validator) if len(error_from_transformation) > 0: return error_from_transformation else: # try: # util.upload_forecast(conn, quantile_json, forecast, # project_name, model_name , time_zero_date, overwrite=False) # except Exception as ex: # print(ex) json_io_dict_batch.append(quantile_json) timezero_date_batch.append(time_zero_date) forecast_filename_batch.append(forecast) else: return errors_from_validation fp.close() # Batch upload for better performance if len(json_io_dict_batch) > 0: try: util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch) except Exception as ex: return ex return "Pass"
def upload_covid_all_forecasts(path_to_processed_model_forecasts): # meta info project_name = 'COVID-19 Forecasts' project_obj = None project_timezeros = [] forecasts = os.listdir(path_to_processed_model_forecasts) conn = util.authenticate() # Get all existing timezeros in the project for project in conn.projects: if project.name == project_name: project_obj = project for timezero in project.timezeros: project_timezeros.append(timezero.timezero_date) break # Get model name separator = '-' dir_name = separator.join( forecasts[0].split(separator)[3:]).split('.csv')[0] metadata = metadata_dict_for_file(path_to_processed_model_forecasts + 'metadata-' + dir_name + '.txt') model_name = metadata['model_name'] model = [ model for model in project_obj.models if model.name == model_name ][0] # Get names of existing forecasts to avoid re-upload existing_forecasts = [forecast.source for forecast in model.forecasts] for forecast in forecasts: # Skip if forecast is already on zoltar if forecast in existing_forecasts: continue # Skip metadata text file if '.txt' in forecast: continue with open(path_to_processed_model_forecasts + forecast) as fp: # Get timezero and create timezero on zoltar if not existed time_zero_date = forecast.split(dir_name)[0][:-1] if time_zero_date not in project_timezeros: try: project_obj.create_timezero(time_zero_date) except Exception as ex: print(ex) # Validate covid19 file errors_from_validation = validate_quantile_csv_file( path_to_processed_model_forecasts + forecast) # Upload forecast if "no errors" == errors_from_validation: quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file( fp, COVID19_TARGET_NAMES, covid19_row_validator) if len(error_from_transformation) > 0: print(error_from_transformation) else: try: util.upload_forecast(conn, quantile_json, forecast, project_name, model_name, time_zero_date, overwrite=False) except Exception as ex: print(ex) else: print(errors_from_validation) fp.close()
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name): global models global model_abbrs # Get all forecasts in the directory of this model forecasts = os.listdir(path_to_processed_model_forecasts) # Get model name or create a new model if it's not in the current Zoltar project try: metadata = metadata_dict_for_file(path_to_processed_model_forecasts + 'metadata-' + dir_name + '.txt') except Exception as ex: return ex model_abbreviation = metadata['model_abbr'] # get the corresponding model_config for the metadata file model_config = zoltar_config_from_metadata(metadata) if model_abbreviation not in model_abbrs: pprint.pprint('%s not in models' % model_abbreviation) if 'home_url' not in model_config: model_config['home_url'] = url + dir_name try: logger.info(f"Creating model {model_config}") models.append(project_obj.create_model(model_config)) model_abbrs = [model.abbreviation for model in models] except Exception as ex: return ex # fetch model based on model_abbr model = [ model for model in models if model.abbreviation == model_abbreviation ][0] if has_changed(metadata, model): # model metadata has changed, call the edit function in zoltpy to update metadata print( f"{metadata['model_abbr']!r} model has changed metadata contents. Updating on Zoltar..." ) model.edit(model_config) # Get names of existing forecasts to avoid re-upload existing_time_zeros = [ forecast.timezero.timezero_date for forecast in model.forecasts ] # Convert all timezeros from Date type to str type existing_time_zeros = [ existing_time_zero.strftime(YYYY_MM_DD_DATE_FORMAT) for existing_time_zero in existing_time_zeros ] # Batch upload json_io_dict_batch = [] forecast_filename_batch = [] timezero_date_batch = [] for forecast in forecasts: # Skip metadata text file if not forecast.endswith('.csv'): continue # Default config over_write = False checksum = 0 time_zero_date = forecast.split(dir_name)[0][:-1] # Check if forecast is already on zoltar with open(path_to_processed_model_forecasts + forecast, "rb") as f: # Get the current hash of a processed file checksum = hashlib.md5(f.read()).hexdigest() f.close() # Check this hash against the previous version of hash if db.get(forecast, None) != checksum: print(forecast, db.get(forecast, None)) if time_zero_date in existing_time_zeros: # Check if the already existing forecast has the same issue date from datetime import date local_issue_date = date.today().strftime("%Y-%m-%d") uploaded_forecast = [ forecast for forecast in model.forecasts if forecast.timezero.timezero_date.strftime( YYYY_MM_DD_DATE_FORMAT) == time_zero_date ][0] uploaded_issue_date = uploaded_forecast.issue_date if local_issue_date == uploaded_issue_date: # Overwrite the existing forecast if has the same issue date over_write = True logger.info( f"Overwrite existing forecast={forecast} with newer version because the new issue_date={local_issue_date} is the same as the uploaded file issue_date={uploaded_issue_date}" ) else: logger.info( f"Add newer version to forecast={forecast} because the new issue_date={local_issue_date} is different from uploaded file issue_date={uploaded_issue_date}" ) else: continue with open(path_to_processed_model_forecasts + forecast) as fp: # Create timezero on zoltar if not existed if time_zero_date not in project_timezeros: try: project_obj.create_timezero(time_zero_date) project_timezeros.append(time_zero_date) except Exception as ex: print(ex) return ex # Validate covid19 file print(f"Validating {forecast}") errors_from_validation = validate_quantile_csv_file( path_to_processed_model_forecasts + forecast) # Upload forecast if "no errors" == errors_from_validation: quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file( fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) if len(error_from_transformation) > 0: return error_from_transformation else: try: logger.debug( 'Upload forecast for model: %s \t|\t File: %s\n' % (metadata['model_abbr'], forecast)) upload_covid_forecast_by_model(conn, quantile_json, forecast, project_name, model, metadata['model_abbr'], time_zero_date, overwrite=over_write) db[forecast] = checksum except Exception as ex: logger.error(ex) return ex json_io_dict_batch.append(quantile_json) timezero_date_batch.append(time_zero_date) forecast_filename_batch.append(forecast) else: return errors_from_validation fp.close() # # Batch upload for better performance # if len(json_io_dict_batch) > 0: # try: # util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write) # except Exception as ex: # return ex return "Pass"
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name): global models global model_names # Get all forecasts in the directory of this model forecasts = os.listdir(path_to_processed_model_forecasts) # Get model name or create a new model if it's not in the current Zoltar project try: metadata = metadata_dict_for_file(path_to_processed_model_forecasts+'metadata-'+dir_name+'.txt') except Exception as ex: return ex model_name = metadata['model_name'] if model_name not in model_names: model_config = {} model_config['name'], model_config['abbreviation'], model_config['team_name'], model_config['description'], model_config['home_url'], model_config['aux_data_url'] \ = metadata['model_name'], metadata['team_abbr']+'-'+metadata['model_abbr'], metadata['team_name'], metadata['methods'], url + dir_name, 'NA' try: project_obj.create_model(model_config) models = project_obj.models model_names = [model.name for model in models] except Exception as ex: return ex model = [model for model in models if model.name == model_name][0] # Get names of existing forecasts to avoid re-upload existing_forecasts = [forecast.source for forecast in model.forecasts] # Batch upload json_io_dict_batch = [] forecast_filename_batch = [] timezero_date_batch = [] for forecast in forecasts: over_write = False checksum = 0 # Check if forecast is already on zoltar with open(path_to_processed_model_forecasts+forecast, "rb") as f: # Get the current hash of a processed file checksum = hashlib.md5(f.read()).hexdigest() f.close() # Check this hash against the previous version of hash if db.get(forecast, None) != checksum: print(forecast) if forecast in existing_forecasts: over_write = True else: continue # Skip metadata text file if '.txt' in forecast: continue with open(path_to_processed_model_forecasts+forecast) as fp: # Get timezero and create timezero on zoltar if not existed time_zero_date = forecast.split(dir_name)[0][:-1] # if time_zero_date != "2020-05-25": # continue if time_zero_date not in project_timezeros: try: project_obj.create_timezero(time_zero_date) project_timezeros.append(time_zero_date) except Exception as ex: return ex # Validate covid19 file errors_from_validation = validate_quantile_csv_file(path_to_processed_model_forecasts+forecast) # Upload forecast if "no errors" == errors_from_validation: quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file(fp, VALID_TARGET_NAMES, covid19_row_validator) if len(error_from_transformation) >0 : return error_from_transformation else: try: util.upload_forecast(conn, quantile_json, forecast, project_name, model_name , time_zero_date, overwrite=over_write) db[forecast] = checksum except Exception as ex: print(ex) return ex json_io_dict_batch.append(quantile_json) timezero_date_batch.append(time_zero_date) forecast_filename_batch.append(forecast) else: return errors_from_validation fp.close() # # Batch upload for better performance # if len(json_io_dict_batch) > 0: # try: # util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write) # except Exception as ex: # return ex return "Pass"
def upload_covid_all_forecasts(path_to_processed_model_forecasts, dir_name): global models global model_names # Get all forecasts in the directory of this model forecasts = os.listdir(path_to_processed_model_forecasts) conn.re_authenticate_if_necessary() # Get model name or create a new model if it's not in the current Zoltar project try: metadata = metadata_dict_for_file(path_to_processed_model_forecasts + 'metadata-' + dir_name + '.txt') except Exception as ex: return ex model_name = metadata['model_name'] if model_name not in model_names: model_config = {} model_config['name'], model_config['abbreviation'], model_config['team_name'], \ model_config['description'], model_config['home_url'], model_config['aux_data_url'] \ = metadata['model_name'], metadata['team_abbr'] + '-' + metadata['model_abbr'], \ metadata['team_name'], metadata['methods'], metadata['website_url'] if metadata.get( 'website_url') != None else url + dir_name, 'NA' try: print('Create model %s' % model_name) project_obj.create_model(model_config) models = project_obj.models model_names = [model.name for model in models] except Exception as ex: return ex print('Time: %s \t Model: %s' % (datetime.now(), model_name)) model = [model for model in models if model.name == model_name][0] # Get names of existing forecasts to avoid re-upload existing_time_zeros = [ forecast.timezero.timezero_date for forecast in model.forecasts ] # Batch upload json_io_dict_batch = [] forecast_filename_batch = [] timezero_date_batch = [] for forecast in forecasts: # Default config over_write = False checksum = 0 time_zero_date = forecast.split(dir_name)[0][:-1] # Check if forecast is already on zoltar with open(path_to_processed_model_forecasts + forecast, "rb") as f: # Get the current hash of a processed file checksum = hashlib.md5(f.read()).hexdigest() f.close() # Check this hash against the previous version of hash # if db.get(forecast, None) != checksum: # print(forecast, db.get(forecast, None)) # if time_zero_date in existing_time_zeros: # over_write = True # else: # continue # if timezero existing, then don't write again if time_zero_date in existing_time_zeros: #update checksum # db[forecast] = checksum continue # Skip metadata text file if '.txt' in forecast: continue with open(path_to_processed_model_forecasts + forecast) as fp: # Create timezero on zoltar if not existed if time_zero_date not in project_timezeros: try: project_obj.create_timezero(time_zero_date) project_timezeros.append(time_zero_date) except Exception as ex: return ex # Validate covid19 file errors_from_validation = validate_quantile_csv_file( path_to_processed_model_forecasts + forecast) # Upload forecast if "no errors" == errors_from_validation: quantile_json, error_from_transformation = json_io_dict_from_quantile_csv_file( fp, COVID_TARGETS, covid19_row_validator, COVID_ADDL_REQ_COLS) if len(error_from_transformation) > 0: return error_from_transformation else: try: print('Upload forecast for model: %s \t|\t File: %s' % (model_name, forecast)) print() util.upload_forecast(conn, quantile_json, forecast, project_name, model_name, time_zero_date, overwrite=over_write) db[forecast] = checksum except Exception as ex: print(ex) return ex json_io_dict_batch.append(quantile_json) timezero_date_batch.append(time_zero_date) forecast_filename_batch.append(forecast) else: return errors_from_validation fp.close() # # Batch upload for better performance # if len(json_io_dict_batch) > 0: # try: # util.upload_forecast_batch(conn, json_io_dict_batch, forecast_filename_batch, project_name, model_name, timezero_date_batch, overwrite = over_write) # except Exception as ex: # return ex return "Pass"
def zoltar_connection_app(): """ Application demonstrating use of the library at the ZoltarConnection level (rather than using the package's higher-level functions such as delete_forecast(), etc.) - App args: None - Required environment variables: - 'Z_HOST': Zoltar host to connect to. typically "https://www.zoltardata.com" - 'Z_USERNAME': username of the account that has permission to access the resources in above app args - 'Z_PASSWORD': password "" """ host = os.environ.get('Z_HOST') username = os.environ.get('Z_USERNAME') password = os.environ.get('Z_PASSWORD') # # try out non-destructive functions # # work with a connection conn = ZoltarConnection(host) conn.authenticate(username, password) print('\n* projects') for project in conn.projects: print(f'- {project}, {project.id}, {project.name}') # work with a project project = [ project for project in conn.projects if project.name == 'Docs Example Project' ][0] print(f'\n* working with {project}') print(f"- objects in {project}:\n" f" = units: {project.units}\n" f" = targets: {project.targets}\n" f" = timezeros: {project.timezeros}\n" f" = models: {project.models}") # get the project's truth detail print(f'\n* truth for {project}') print( f'- source, created_at: {project.truth_source}, {project.truth_created_at}' ) # get the project's latest forecasts print(f'\n* latests forecasts for {project}') print(f'- source, created_at: {project.latest_forecasts}') # work with a model model = [ model for model in project.models if model.name == 'docs forecast model' ][0] print(f'\n* working with {model}') print(f'- forecasts: {model.forecasts}') # work with a forecast forecast = model.forecasts[0] print(f'\n* working with {forecast}') forecast_data = forecast.data() print(f"- data: {len(forecast_data['predictions'])} predictions" ) # 26 predictions # work with a cdc csv file cdc_csv_file = "tests/EW01-2011-ReichLab_kde_US_National.csv" print(f'\n* working with a cdc csv file: {cdc_csv_file}') with open(cdc_csv_file) as fp: json_io_dict = json_io_dict_from_cdc_csv_file(2011, fp) print( f"- converted cdc data to json: {len(json_io_dict['predictions'])} predictions" ) # 154 predictions # work with a quantile csv file quantile_csv_file = "tests/quantile-predictions.csv" print(f'\n* working with a quantile csv file: {quantile_csv_file}') with open(quantile_csv_file) as fp: json_io_dict, error_messages = \ json_io_dict_from_quantile_csv_file(fp, ['1 wk ahead cum death', '1 day ahead inc hosp']) print( f"- converted quantile data to json: {len(json_io_dict['predictions'])} predictions" ) # 5 predictions # convert to a Pandas DataFrame print(f'\n* working with a pandas data frame') dataframe = dataframe_from_json_io_dict(forecast_data) print(f'- dataframe: {dataframe}') # query forecast data print(f"\n* querying forecast data") query = { 'targets': ['pct next week', 'cases next week'], 'types': ['point'] } job = project.submit_query(QueryType.FORECASTS, query) busy_poll_job(job) # does refresh() rows = job.download_data() print(f"- got {len(rows)} forecast rows. as a dataframe:") print(dataframe_from_rows(rows)) # query truth data print(f"\n* querying truth data") query = {'targets': ['pct next week', 'cases next week']} job = project.submit_query(QueryType.TRUTH, query) busy_poll_job(job) # does refresh() rows = job.download_data() print(f"- got {len(rows)} truth rows. as a dataframe:") print(dataframe_from_rows(rows)) # # try out destructive functions # # create a sandbox project to play with, deleting the existing one if any: docs-project.json project = [ project for project in conn.projects if project.name == 'My project' ] project = project[0] if project else None if project: print(f"\n* deleting project {project}") project.delete() print("- deleted project") print(f"\n* creating project") project = create_project( conn, "examples/docs-project.json") # "name": "My project" print(f"- created project: {project}") # upload truth print(f"\n* uploading truth") with open('tests/docs-ground-truth.csv') as csv_fp: job = project.upload_truth_data(csv_fp) busy_poll_job(job) print(f"- upload truth done") # create a model, upload a forecast, query the project, then delete it print(f"\n* creating model") with open("examples/example-model-config.json") as fp: model = project.create_model(json.load(fp)) print(f"- created model: {model}") print(f"\n* uploading forecast. pre-upload forecasts: {model.forecasts}") with open("examples/docs-predictions.json") as fp: json_io_dict = json.load(fp) job = model.upload_forecast(json_io_dict, "docs-predictions.json", "2011-10-02", "some predictions") busy_poll_job(job) new_forecast = job.created_forecast() print(f"- uploaded forecast: {new_forecast}") model.refresh() print(f'\n* post-upload forecasts: {model.forecasts}') print(f"\n* deleting forecast: {new_forecast}") job = new_forecast.delete() busy_poll_job(job) print(f"- deleting forecast: done") # clean up by deleting the sandbox project. NB: This will delete all of the data associated with the project without # warning, including models and forecasts print(f"\n* deleting project {project}") project.delete() print("- deleted project") print("\n* app done!")
def test_json_io_dict_from_quantile_csv_file_no_points(self): with open('tests/quantile-predictions-no-point.csv') as quantile_fp: _, error_messages = json_io_dict_from_quantile_csv_file( quantile_fp, ['1 day ahead inc hosp', '1 wk ahead cum death']) self.assertEqual(0, len(error_messages))