def get_predictions(start_date_str, end_date_str, pres_df, countries=None): # Concatenate prescriptions with historical data raw_df = get_raw_data(HIST_DATA_FILE_PATH) hist_df = generate_scenario(start_date_str, end_date_str, raw_df, countries=countries, scenario='Historical') start_date = pd.to_datetime(start_date_str, format='%Y-%m-%d') hist_df = hist_df[hist_df.Date < start_date] ips_df = pd.concat([hist_df, pres_df]) with tempfile.NamedTemporaryFile() as tmp_ips_file: # Write ips_df to file ips_df.to_csv(tmp_ips_file.name) with tempfile.NamedTemporaryFile() as tmp_pred_file: # Run script to generate predictions output_str = subprocess.check_output([ 'python3', PREDICT_MODULE, '--start_date', start_date_str, '--end_date', end_date_str, '--interventions_plan', tmp_ips_file.name, '--output_file', tmp_pred_file.name ], stderr=subprocess.STDOUT) # Print output from running script print(output_str.decode("utf-8")) # Load predictions to return # print(f"Generating predictions from {start_date_str} to {start_date_str} from {tmp_pred_file.name} . {tmp_ips_file.name}..") # predict(start_date_str, start_date_str, tmp_ips_file.name, tmp_pred_file.name) df = pd.read_csv(tmp_pred_file) return df
def test_generate_scenario_mind_the_gap_freeze_dates_mismatch(self): # Check scenario contains all days, for 2 countries, where 1 country has 1 more day of data than the other # Last known date: # - Belgium: 20201103 # - Brazil: 20201104 # Make sure we don't skip a day start_date_str = "2021-01-01" end_date_str = "2021-01-31" countries = ["Belgium", "Brazil"] dates_mismatch_df = get_raw_data(DATES_MISMATCH_DATA_FILE, latest=False) scenario_df = generate_scenario(start_date_str, end_date_str, dates_mismatch_df, countries, scenario="Freeze") self.assertIsNotNone(scenario_df) # Misleading name but checks the elements, regardless of order self.assertCountEqual(countries, scenario_df.CountryName.unique(), "Not the requested countries") # Inception is 2020-01-01. 366 days for 2020 + 31 for Jan 2021 nb_days_since_inception = 397 # For each country, assert the scenario contains the expected number of days for country in countries: all_regions = dates_mismatch_df[dates_mismatch_df.CountryName == country].RegionName.unique() for region in all_regions: ips_gdf = scenario_df[(scenario_df.CountryName == country) & (scenario_df.RegionName == region)] self.assertEqual( nb_days_since_inception, len(ips_gdf), f"Not the expected number of days" f" for {country} / {region}")
def get_predictions(start_date_str, end_date_str, pres_df, countries=None): # Concatenate prescriptions with historical data raw_df = get_raw_data(HIST_DATA_FILE_PATH) hist_df = generate_scenario(start_date_str, end_date_str, raw_df, countries=countries, scenario='Historical') start_date = pd.to_datetime(start_date_str, format='%Y-%m-%d') hist_df = hist_df[hist_df.Date < start_date] ips_df = pd.concat([hist_df, pres_df]) with tempfile.NamedTemporaryFile() as tmp_ips_file: xp = XPrizePredictor() ips_df.to_csv(tmp_ips_file.name) x = xp.predict(start_date_str, end_date_str, tmp_ips_file.name) return x
def get_predictions(start_date_str, end_date_str, pres_df, countries=None): # Concatenate prescriptions with historical data raw_df = get_raw_data(HIST_DATA_FILE_PATH) hist_df = generate_scenario(start_date_str, end_date_str, raw_df, countries=countries, scenario='Historical') start_date = pd.to_datetime(start_date_str, format='%Y-%m-%d') hist_df = hist_df[hist_df.Date < start_date] ips_df = pd.concat([hist_df, pres_df]) # Write ips_df to file ips_df.to_csv(TMP_PRESCRIPTION_FILE) # Use full path of the local file passed as ip_file ip_file_full_path = os.path.abspath(TMP_PRESCRIPTION_FILE) # Go to covid-xprize root dir to access predict script wd = os.getcwd() os.chdir("../../../..") # Run script to generate predictions output_str = subprocess.check_output( [ 'python', PREDICT_MODULE, '--start_date', start_date_str, '--end_date', end_date_str, '--interventions_plan', ip_file_full_path, '--output_file', TMP_PRED_FILE_NAME ], stderr=subprocess.STDOUT ) # Print output from running script print(output_str.decode("utf-8")) # Load predictions to return df = pd.read_csv(TMP_PRED_FILE_NAME) # Return to prescriptor dir os.chdir(wd) return df
def generate_costs(distribution='ones'): """ Returns df of costs for each IP for each geo according to distribution. Costs always sum to #IPS (i.e., len(IP_COLUMNS)). Available distributions: - 'ones': cost is 1 for each IP. - 'uniform': costs are sampled uniformly across IPs independently for each geo. """ assert distribution in ['ones', 'uniform'], \ f'Unsupported distribution {distribution}' df = get_raw_data(DATA_FILE, latest=False) # Reduce df to one row per geo df = df.groupby(['CountryName', 'RegionName']).mean().reset_index() # Reduce to geo id info df = df[['CountryName', 'RegionName']] if distribution == 'ones': df[IP_COLUMNS] = 1 elif distribution == 'uniform': # Generate weights uniformly for each geo independently. nb_geos = len(df) nb_ips = len(IP_COLUMNS) samples = np.random.uniform(size=(nb_ips, nb_geos)) weights = nb_ips * samples / samples.sum(axis=0) df[IP_COLUMNS] = weights.T # Round weights for better readability with neglible loss of generality. df = df.round(2) return df
def setUpClass(cls): # Load the csv data only once cls.latest_df = get_raw_data(DATA_FILE, latest=True)
# In[29]: from datetime import datetime, timedelta start_date = datetime.now() + timedelta(days=7) start_date_str = start_date.strftime('%Y-%m-%d') end_date = start_date + timedelta(days=180) end_date_str = end_date.strftime('%Y-%m-%d') print(f"Start date: {start_date_str}") print(f"End date: {end_date_str}") # In[30]: from covid_xprize.validation.scenario_generator import get_raw_data, generate_scenario, NPI_COLUMNS DATA_FILE = 'data/OxCGRT_latest.csv' latest_df = get_raw_data(DATA_FILE, latest=True) scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries=None, scenario="Freeze") scenario_file = "predictions/180_days_future_scenario.csv" scenario_df.to_csv(scenario_file, index=False) print(f"Saved scenario to {scenario_file}") # ### Check it # In[ ]: get_ipython().run_cell_magic( 'time', '',
import pandas as pd from datetime import datetime, timedelta from covid_xprize.validation.scenario_generator import get_raw_data, generate_scenario, NPI_COLUMNS #start_date = datetime.now() + timedelta(days=7) #start_date_str = start_date.strftime('%Y-%m-%d') start_date_str = '2020-10-01' start_date = pd.to_datetime(start_date_str, format='%Y-%m-%d') end_date = start_date + timedelta(days=180) end_date_str = end_date.strftime('%Y-%m-%d') print(f"Start date: {start_date_str}") print(f"End date: {end_date_str}") DATA_FILE = '../data/OxCGRT_latest.csv' latest_df = get_raw_data(DATA_FILE, latest=False) scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries=None, scenario="Freeze") scenario_file = "180_days_future_scenario.csv" scenario_df.to_csv(scenario_file, index=False) print(f"Saved scenario to {scenario_file}")