Exemplo n.º 1
0
def get_predictions(start_date_str, end_date_str, pres_df, countries=None):

    # Concatenate prescriptions with historical data
    raw_df = get_raw_data(HIST_DATA_FILE_PATH)
    hist_df = generate_scenario(start_date_str,
                                end_date_str,
                                raw_df,
                                countries=countries,
                                scenario='Historical')
    start_date = pd.to_datetime(start_date_str, format='%Y-%m-%d')
    hist_df = hist_df[hist_df.Date < start_date]
    ips_df = pd.concat([hist_df, pres_df])

    with tempfile.NamedTemporaryFile() as tmp_ips_file:
        # Write ips_df to file
        ips_df.to_csv(tmp_ips_file.name)

        with tempfile.NamedTemporaryFile() as tmp_pred_file:
            # Run script to generate predictions
            output_str = subprocess.check_output([
                'python3', PREDICT_MODULE, '--start_date', start_date_str,
                '--end_date', end_date_str, '--interventions_plan',
                tmp_ips_file.name, '--output_file', tmp_pred_file.name
            ],
                                                 stderr=subprocess.STDOUT)

            # Print output from running script
            print(output_str.decode("utf-8"))

            # Load predictions to return
            # print(f"Generating predictions from {start_date_str} to {start_date_str} from {tmp_pred_file.name} . {tmp_ips_file.name}..")
            # predict(start_date_str, start_date_str, tmp_ips_file.name, tmp_pred_file.name)
            df = pd.read_csv(tmp_pred_file)

    return df
Exemplo n.º 2
0
 def test_generate_scenario_mind_the_gap_freeze_dates_mismatch(self):
     # Check scenario contains all days, for 2 countries, where 1 country has 1 more day of data than the other
     # Last known date:
     # - Belgium: 20201103
     # - Brazil:  20201104
     # Make sure we don't skip a day
     start_date_str = "2021-01-01"
     end_date_str = "2021-01-31"
     countries = ["Belgium", "Brazil"]
     dates_mismatch_df = get_raw_data(DATES_MISMATCH_DATA_FILE,
                                      latest=False)
     scenario_df = generate_scenario(start_date_str,
                                     end_date_str,
                                     dates_mismatch_df,
                                     countries,
                                     scenario="Freeze")
     self.assertIsNotNone(scenario_df)
     # Misleading name but checks the elements, regardless of order
     self.assertCountEqual(countries, scenario_df.CountryName.unique(),
                           "Not the requested countries")
     # Inception is 2020-01-01. 366 days for 2020 + 31 for Jan 2021
     nb_days_since_inception = 397
     # For each country, assert the scenario contains the expected number of days
     for country in countries:
         all_regions = dates_mismatch_df[dates_mismatch_df.CountryName ==
                                         country].RegionName.unique()
         for region in all_regions:
             ips_gdf = scenario_df[(scenario_df.CountryName == country)
                                   & (scenario_df.RegionName == region)]
             self.assertEqual(
                 nb_days_since_inception, len(ips_gdf),
                 f"Not the expected number of days"
                 f" for {country} / {region}")
Exemplo n.º 3
0
def get_predictions(start_date_str, end_date_str, pres_df, countries=None):
    # Concatenate prescriptions with historical data
    raw_df = get_raw_data(HIST_DATA_FILE_PATH)
    hist_df = generate_scenario(start_date_str,
                                end_date_str,
                                raw_df,
                                countries=countries,
                                scenario='Historical')
    start_date = pd.to_datetime(start_date_str, format='%Y-%m-%d')
    hist_df = hist_df[hist_df.Date < start_date]
    ips_df = pd.concat([hist_df, pres_df])

    with tempfile.NamedTemporaryFile() as tmp_ips_file:
        xp = XPrizePredictor()
        ips_df.to_csv(tmp_ips_file.name)
        x = xp.predict(start_date_str, end_date_str, tmp_ips_file.name)

    return x
Exemplo n.º 4
0
def get_predictions(start_date_str, end_date_str, pres_df, countries=None):

    # Concatenate prescriptions with historical data
    raw_df = get_raw_data(HIST_DATA_FILE_PATH)
    hist_df = generate_scenario(start_date_str, end_date_str, raw_df,
                                countries=countries, scenario='Historical')
    start_date = pd.to_datetime(start_date_str, format='%Y-%m-%d')
    hist_df = hist_df[hist_df.Date < start_date]
    ips_df = pd.concat([hist_df, pres_df])

    # Write ips_df to file
    ips_df.to_csv(TMP_PRESCRIPTION_FILE)

    # Use full path of the local file passed as ip_file
    ip_file_full_path = os.path.abspath(TMP_PRESCRIPTION_FILE)

    # Go to covid-xprize root dir to access predict script
    wd = os.getcwd()
    os.chdir("../../../..")

    # Run script to generate predictions
    output_str = subprocess.check_output(
        [
            'python', PREDICT_MODULE,
            '--start_date', start_date_str,
            '--end_date', end_date_str,
            '--interventions_plan', ip_file_full_path,
            '--output_file', TMP_PRED_FILE_NAME
        ],
        stderr=subprocess.STDOUT
    )

    # Print output from running script
    print(output_str.decode("utf-8"))

    # Load predictions to return
    df = pd.read_csv(TMP_PRED_FILE_NAME)

    # Return to prescriptor dir
    os.chdir(wd)

    return df
def generate_costs(distribution='ones'):
    """
    Returns df of costs for each IP for each geo according to distribution.

    Costs always sum to #IPS (i.e., len(IP_COLUMNS)).

    Available distributions:
        - 'ones': cost is 1 for each IP.
        - 'uniform': costs are sampled uniformly across IPs independently
                     for each geo.
    """
    assert distribution in ['ones', 'uniform'], \
           f'Unsupported distribution {distribution}'

    df = get_raw_data(DATA_FILE, latest=False)

    # Reduce df to one row per geo
    df = df.groupby(['CountryName', 'RegionName']).mean().reset_index()

    # Reduce to geo id info
    df = df[['CountryName', 'RegionName']]

    if distribution == 'ones':
        df[IP_COLUMNS] = 1

    elif distribution == 'uniform':

        # Generate weights uniformly for each geo independently.
        nb_geos = len(df)
        nb_ips = len(IP_COLUMNS)
        samples = np.random.uniform(size=(nb_ips, nb_geos))
        weights = nb_ips * samples / samples.sum(axis=0)
        df[IP_COLUMNS] = weights.T

        # Round weights for better readability with neglible loss of generality.
        df = df.round(2)

    return df
Exemplo n.º 6
0
 def setUpClass(cls):
     # Load the csv data only once
     cls.latest_df = get_raw_data(DATA_FILE, latest=True)
Exemplo n.º 7
0
# In[29]:

from datetime import datetime, timedelta

start_date = datetime.now() + timedelta(days=7)
start_date_str = start_date.strftime('%Y-%m-%d')
end_date = start_date + timedelta(days=180)
end_date_str = end_date.strftime('%Y-%m-%d')
print(f"Start date: {start_date_str}")
print(f"End date: {end_date_str}")

# In[30]:

from covid_xprize.validation.scenario_generator import get_raw_data, generate_scenario, NPI_COLUMNS
DATA_FILE = 'data/OxCGRT_latest.csv'
latest_df = get_raw_data(DATA_FILE, latest=True)
scenario_df = generate_scenario(start_date_str,
                                end_date_str,
                                latest_df,
                                countries=None,
                                scenario="Freeze")
scenario_file = "predictions/180_days_future_scenario.csv"
scenario_df.to_csv(scenario_file, index=False)
print(f"Saved scenario to {scenario_file}")

# ### Check it

# In[ ]:

get_ipython().run_cell_magic(
    'time', '',
Exemplo n.º 8
0
import pandas as pd
from datetime import datetime, timedelta
from covid_xprize.validation.scenario_generator import get_raw_data, generate_scenario, NPI_COLUMNS

#start_date = datetime.now() + timedelta(days=7)
#start_date_str = start_date.strftime('%Y-%m-%d')

start_date_str = '2020-10-01'
start_date = pd.to_datetime(start_date_str, format='%Y-%m-%d')

end_date = start_date + timedelta(days=180)
end_date_str = end_date.strftime('%Y-%m-%d')
print(f"Start date: {start_date_str}")
print(f"End date: {end_date_str}")

DATA_FILE = '../data/OxCGRT_latest.csv'
latest_df = get_raw_data(DATA_FILE, latest=False)
scenario_df = generate_scenario(start_date_str,
                                end_date_str,
                                latest_df,
                                countries=None,
                                scenario="Freeze")
scenario_file = "180_days_future_scenario.csv"
scenario_df.to_csv(scenario_file, index=False)
print(f"Saved scenario to {scenario_file}")