예제 #1
0
def query_project(conn, project_name, query_type, query):
    """
    Submits a request for the execution of a query of either forecasts or truth
    in a specified Zoltar project.
    :param conn: a ZoltarConnection
    :param project_name: name of the Project to query
    :param query_type: a QueryType enum value indicating the type of query to run
    :param query: a dict that constrains the queried data. It is the analog of the JSON object documented at
        https://docs.zoltardata.com/ . Briefly, query is a dict whose keys vary depending on query_type. References
        to models, units, targets, and timezeros are strings that name the objects, and not IDs. Following are some
        examples of the three types of queries:
    Forecasts:
        {"models": ["60-contact", "CovidIL_100"],
          "units": ["US"],
          "targets": ["0 day ahead cum death", "1 day ahead cum death"],
          "timezeros": ["2020-05-14", "2020-05-09"],
          "types": ["point", "quantile"]}
    Truth:
        {"units": ["US"],
          "targets": ["0 day ahead cum death", "1 day ahead cum death"],
          "timezeros": ["2020-05-14", "2020-05-09"]}
    :return: a pandas data frame of query results. The columns depend on the originating query.
    """
    # identify project to query from project_name
    conn.re_authenticate_if_necessary()
    projects = conn.projects
    matching_projects = [project for project in projects if project.name == project_name]
    if not matching_projects:
        raise RuntimeError(f"found no project named '{project_name}' in {projects}")
    
    project = matching_projects[0]
    
    # submit query
    job = project.submit_query(query_type, query)
    
    # poll job until results are available
    util.busy_poll_job(job)
    
    # get results, format is rows of a csv
    csv_rows = job.download_data()
    
    # convert to a pandas data frame
    if len(csv_rows) == 1:
      # empty data frame with specified column names
      result_df = pd.DataFrame(columns = csv_rows[0])
    else:
      # concatenate rows into one data frame
      result_df = pd.concat([
        pd.DataFrame([csv_rows[i]], columns = csv_rows[0]) \
          for i in range(1, len(csv_rows))
      ])
    
    return result_df
def upload_covid_forecast_by_model(conn,
                                   json_io_dict,
                                   forecast_filename,
                                   project_name,
                                   model,
                                   model_abbr,
                                   timezero_date,
                                   notes='',
                                   overwrite=False,
                                   sync=True):
    conn.re_authenticate_if_necessary()
    if overwrite:
        print(
            f"Existing forecast({forecast_filename}) present. Deleting it on Zoltar to upload latest one"
        )
        util.delete_forecast(conn, project_name, model_abbr, timezero_date)

    # check json formatting before upload
    # accepts either string or dictionary
    if isinstance(json_io_dict, str):
        try:
            with open(json_io_dict) as jsonfile:
                json_io_dict = json.load(jsonfile)
        except:
            print("""\nERROR - cannot read JSON Format. 
            Uploading a CSV? Consider converting to quantile csv style with:
            quantile_json, error_from_transformation = quantile_io.json_io_dict_from_quantile_csv_file(...)"""
                  )
            sys.exit(1)

    tries = 0
    # Runs only twice
    while tries < 2:
        try:
            job = model.upload_forecast(json_io_dict, forecast_filename,
                                        timezero_date, notes)
            if sync:
                return util.busy_poll_job(job)
            else:
                return job
        except RuntimeError as err:
            print(
                f"RuntimeError occured while uploading forecast. Error: {err}")
            if err.args is not None and len(
                    err.args) > 1 and err.args[1].status_code == 400:
                # status code is 400 and we need to rewrite this model.
                response = err.args[1]
                if str(json.loads(response.text)["error"]).startswith(
                        "A forecast already exists"):
                    # now we are sure it is the existing forecast error,, delete the one on zoltar and then try again.
                    print(
                        f"This forecast({model_abbr}) with timezero ({timezero_date}) is already present, deleting forecast on Zoltar and then retrying..."
                    )
                    util.delete_forecast(conn, project_name, model_abbr,
                                         timezero_date)
                    print("Deleted on Zoltar. Retrying now.")
                    tries += 1
def upload_covid_forecast_by_model(conn, json_io_dict, forecast_filename, project_name, model, model_abbr, timezero_date, notes='',
                    overwrite=False, sync=True):
    conn.re_authenticate_if_necessary()
    if overwrite:
        util.delete_forecast(conn, project_name, model_abbr, timezero_date)

    # check json formatting before upload
    # accepts either string or dictionary
    if isinstance(json_io_dict, str):
        try:
            with open(json_io_dict) as jsonfile:
                json_io_dict = json.load(jsonfile)
        except:
            print("""\nERROR - cannot read JSON Format. 
            Uploading a CSV? Consider converting to quantile csv style with:
            quantile_json, error_from_transformation = quantile_io.json_io_dict_from_quantile_csv_file(...)""")
            sys.exit(1)

    job = model.upload_forecast(json_io_dict, forecast_filename, timezero_date, notes)
    if sync:
        return util.busy_poll_job(job)
    else:
        return job
from zoltpy import util
from zoltpy.connection import ZoltarConnection
import os
import sys

# PATH TO ZOLTAR TRUTH FILE. CHANGE THIS IF NEEDED
path_to_zoltar_truth = './data-truth/zoltar-truth.csv'

# meta info
project_name = 'COVID-19 Forecasts'
project_obj = None
conn = util.authenticate()
url = 'https://github.com/midas-network/covid19-scenario-modeling-hub/tree/master/data-processed/'

# Get the project
project_obj = [
    project for project in conn.projects if project.name == project_name
][0]

# Example Run: python3 ./code/zoltar_scripts/upload_truth_to_zoltar.py
if __name__ == '__main__':
    with open(path_to_zoltar_truth) as fr:
        upload_file_job = project_obj.upload_truth_data(fr)
    util.busy_poll_job(upload_file_job)
    print(f"- upload truth done")
예제 #5
0
def zoltar_connection_app():
    """
    Application demonstrating use of the library at the ZoltarConnection level (rather than using the package's
    higher-level functions such as delete_forecast(), etc.)

    - App args: None
    - Required environment variables:
      - 'Z_HOST': Zoltar host to connect to. typically "https://www.zoltardata.com"
      - 'Z_USERNAME': username of the account that has permission to access the resources in above app args
      - 'Z_PASSWORD': password ""
    """
    host = os.environ.get('Z_HOST')
    username = os.environ.get('Z_USERNAME')
    password = os.environ.get('Z_PASSWORD')

    #
    # try out non-destructive functions
    #

    # work with a connection
    conn = ZoltarConnection(host)
    conn.authenticate(username, password)
    print('\n* projects')
    for project in conn.projects:
        print(f'- {project}, {project.id}, {project.name}')

    # work with a project
    project = [
        project for project in conn.projects
        if project.name == 'Docs Example Project'
    ][0]
    print(f'\n* working with {project}')
    print(f"- objects in {project}:\n"
          f"  = units: {project.units}\n"
          f"  = targets: {project.targets}\n"
          f"  = timezeros: {project.timezeros}\n"
          f"  = models: {project.models}")

    # get the project's truth detail
    print(f'\n* truth for {project}')
    print(
        f'- source, created_at: {project.truth_source}, {project.truth_created_at}'
    )

    # get the project's latest forecasts
    print(f'\n* latests forecasts for {project}')
    print(f'- source, created_at: {project.latest_forecasts}')

    # work with a model
    model = [
        model for model in project.models
        if model.name == 'docs forecast model'
    ][0]
    print(f'\n* working with {model}')
    print(f'- forecasts: {model.forecasts}')

    # work with a forecast
    forecast = model.forecasts[0]
    print(f'\n* working with {forecast}')

    forecast_data = forecast.data()
    print(f"- data: {len(forecast_data['predictions'])} predictions"
          )  # 26 predictions

    # work with a cdc csv file
    cdc_csv_file = "tests/EW01-2011-ReichLab_kde_US_National.csv"
    print(f'\n* working with a cdc csv file: {cdc_csv_file}')
    with open(cdc_csv_file) as fp:
        json_io_dict = json_io_dict_from_cdc_csv_file(2011, fp)
    print(
        f"- converted cdc data to json: {len(json_io_dict['predictions'])} predictions"
    )  # 154 predictions

    # work with a quantile csv file
    quantile_csv_file = "tests/quantile-predictions.csv"
    print(f'\n* working with a quantile csv file: {quantile_csv_file}')
    with open(quantile_csv_file) as fp:
        json_io_dict, error_messages = \
            json_io_dict_from_quantile_csv_file(fp, ['1 wk ahead cum death', '1 day ahead inc hosp'])
    print(
        f"- converted quantile data to json: {len(json_io_dict['predictions'])} predictions"
    )  # 5 predictions

    # convert to a Pandas DataFrame
    print(f'\n* working with a pandas data frame')
    dataframe = dataframe_from_json_io_dict(forecast_data)
    print(f'- dataframe: {dataframe}')

    # query forecast data
    print(f"\n* querying forecast data")
    query = {
        'targets': ['pct next week', 'cases next week'],
        'types': ['point']
    }
    job = project.submit_query(QueryType.FORECASTS, query)
    busy_poll_job(job)  # does refresh()
    rows = job.download_data()
    print(f"- got {len(rows)} forecast rows. as a dataframe:")
    print(dataframe_from_rows(rows))

    # query truth data
    print(f"\n* querying truth data")
    query = {'targets': ['pct next week', 'cases next week']}
    job = project.submit_query(QueryType.TRUTH, query)
    busy_poll_job(job)  # does refresh()
    rows = job.download_data()
    print(f"- got {len(rows)} truth rows. as a dataframe:")
    print(dataframe_from_rows(rows))

    #
    # try out destructive functions
    #

    # create a sandbox project to play with, deleting the existing one if any: docs-project.json
    project = [
        project for project in conn.projects if project.name == 'My project'
    ]
    project = project[0] if project else None
    if project:
        print(f"\n* deleting project {project}")
        project.delete()
        print("- deleted project")

    print(f"\n* creating project")
    project = create_project(
        conn, "examples/docs-project.json")  # "name": "My project"
    print(f"- created project: {project}")

    # upload truth
    print(f"\n* uploading truth")
    with open('tests/docs-ground-truth.csv') as csv_fp:
        job = project.upload_truth_data(csv_fp)
    busy_poll_job(job)
    print(f"- upload truth done")

    # create a model, upload a forecast, query the project, then delete it
    print(f"\n* creating model")
    with open("examples/example-model-config.json") as fp:
        model = project.create_model(json.load(fp))
    print(f"- created model: {model}")

    print(f"\n* uploading forecast. pre-upload forecasts: {model.forecasts}")
    with open("examples/docs-predictions.json") as fp:
        json_io_dict = json.load(fp)
        job = model.upload_forecast(json_io_dict, "docs-predictions.json",
                                    "2011-10-02", "some predictions")
    busy_poll_job(job)
    new_forecast = job.created_forecast()
    print(f"- uploaded forecast: {new_forecast}")

    model.refresh()
    print(f'\n* post-upload forecasts: {model.forecasts}')

    print(f"\n* deleting forecast: {new_forecast}")
    job = new_forecast.delete()
    busy_poll_job(job)
    print(f"- deleting forecast: done")

    # clean up by deleting the sandbox project. NB: This will delete all of the data associated with the project without
    # warning, including models and forecasts
    print(f"\n* deleting project {project}")
    project.delete()
    print("- deleted project")

    print("\n* app done!")