Пример #1
0
def get_acause(model_version_id, connection):
    call = '''
    SELECT cause_id FROM cod.model_version
        WHERE model_version_id = {model_version_id}
    '''.format(model_version_id=model_version_id)
    cause_id = query(call, connection)["cause_id"][0]
    call = '''
    SELECT acause from shared.cause WHERE cause_id = {cause_id}
    '''.format(cause_id=cause_id)
    acause = query(call, connection)["acause"][0]
    return acause
Пример #2
0
def new_covariates(old_model_version_id, new_model_version_id, db_connection):
    '''
    (int, int) -> None

    Given two integers representing a previous model version ID used for CODEm
    and a new model, update the new model to use the newest version of the
    covariates by inputting those values in the production database.
    '''
    call = '''
    SELECT * FROM cod.model_covariate where model_version_id = {model}
    '''.format(model=old_model_version_id)
    df = db_connect.query(call, db_connection)

    df["model_version_id"] = new_model_version_id
    df["covariate_model_version_id"] = \
        df.covariate_model_version_id.map(lambda x: get_latest_covariate(x, db_connection))
    drop_cols = [
        "model_covariate_id", "date_inserted", "inserted_by", "last_updated",
        "last_updated_by", "last_updated_action"
    ]
    df.drop(drop_cols, axis=1, inplace=True)
    db_connect.write_df_to_sql(df,
                               db="cod",
                               table="model_covariate",
                               connection=db_connection,
                               creds=None)
Пример #3
0
def update_status(status, model_version_id, connection):
    '''
    :param status: int
        the target status, e.g. 0 for pending, 1 for success, 7 for failure
    :param model_version_id: int
        which model to update the status of
    :param connection: string
        the database (server) to connect to

    :return: None
    '''
    call = '''
    UPDATE cod.model_version
    SET status = {status}
    WHERE model_version_id = {mvid}
    '''.format(status=status, mvid=model_version_id)
    query(call, connection)
Пример #4
0
def get_acause(model_version_id, db_connection):
    """
    :param model_version_id: integer representing a codem model version id
    :return: cause

    Given a model version id returns the corresponding cause.
    """
    call = '''
    SELECT cause_id FROM cod.model_version
        WHERE model_version_id = {model_version_id}
    '''.format(model_version_id=model_version_id)
    cause_id = np.array(query(call, db_connection))[0, 0]
    call = '''
    SELECT acause FROM shared.cause WHERE cause_id = {cause_id}
    '''.format(cause_id=cause_id)
    acause = np.array(query(call, db_connection))[0, 0]
    return acause
Пример #5
0
def location_ids_for_covariate(covariate_model_version_id, db_connection):
    call = '''
    SELECT DISTINCT location_id FROM covariate.model
        WHERE model_version_id = {covariate_model_version_id}
        ORDER BY location_id
    '''.format(covariate_model_version_id=covariate_model_version_id)
    locations = np.array(query(call, db_connection))[:, 0]
    return locations
Пример #6
0
def covariate_id_to_short_name(covariate_model_version_id, db_connection):
    call = '''
    SELECT covariate_name_short FROM shared.covariate
        WHERE covariate_id IN (SELECT covariate_id from covariate.data_version WHERE data_version_id IN
            (SELECT data_version_id FROM covariate.model_version
                WHERE model_version_id = {covariate_model_version_id}))
    '''.format(covariate_model_version_id=covariate_model_version_id)
    covariate_short_name = np.array(query(call, db_connection))[0, 0]
    return covariate_short_name
Пример #7
0
def set_rerun_models(list_of_models, gbd_round_id, db_connection, desc=None):
    """
    set of codem models based on past model version id

    :param list_of_models: list like
        list like object of model version ids
    :param desc: str
        string to use for description name
    """
    models_string = "(" + ",".join([str(l) for l in set(list_of_models)]) + ")"
    branch = "master"
    drop_cols = [
        "model_version_id", "date_inserted", "inserted_by", "last_updated",
        "last_updated_by", "last_updated_action"
    ]
    null_cols = [
        "pv_rmse_in",
        "pv_rmse_out",
        "pv_coverage_in",
        "pv_coverage_out",
        "pv_trend_in",
        "pv_trend_out",
        "pv_psi",
        "best_start",
        "best_end",
    ]
    call = '''
    SELECT *
    FROM
    cod.model_version
    WHERE
    model_version_id IN {ms}
    '''.format(ms=models_string)
    df = db_connect.query(call, db_connection)
    df["description"] = desc
    df["code_version"] = '{"branch": "%s", "commit": "%s"}' % (
        branch, get_current_commit(branch))
    df[["status", "is_best"]] = 0
    df["previous_model_version_id"] = df["model_version_id"]
    df["location_set_version_id"] = current_location_set_id(
        gbd_round_id, db_connection)
    df["cause_set_version_id"] = current_cause_set_id(gbd_round_id,
                                                      db_connection)
    df["gbd_round_id"] = gbd_round_id
    df["locations_exclude"] = get_excluded_locations(df)
    # for right now replace oldest age group 21 with new oldest age group
    # we probably want to change this in the future
    df.loc[df.age_end == 21, "age_end"] = 235
    df[null_cols] = np.NaN
    df.drop(drop_cols, axis=1, inplace=True)
    models = db_connect.write_df_to_sql(df,
                                        db="cod",
                                        table="model_version",
                                        connection=db_connection,
                                        creds=None,
                                        return_key=True)
    return models
Пример #8
0
def cause_list(db_connection):
    """
    Pull the cause list from the database.
    """
    call = '''
        SELECT cause_id, cause_name FROM shared.cause;
    '''
    data = query(call, db_connection)
    data = data.rename(columns={"cause_name": "Cause"})
    return data
Пример #9
0
def age_list(db_connection):
    """
    Pull the age list from the database.
    """
    call = '''
        SELECT age_group_id, age_group_alternative_name as age_name
        FROM shared.age_group;
    '''
    data = query(call, db_connection)
    return data
Пример #10
0
def get_latest_covariate(model_version_id, acause, db_connection,
                         gbd_round_id):
    '''
    Given a covariate model version id find the best model version id for that
    covariate.

    :param model_version_id: int
        model version id to look up and create duplicate of
    :param gbd_round_id: int
        gbd round id of the CURRENT model run
    :return: int
        new model version id
    '''
    covariate_id = get_covariate_id(model_version_id)

    # really sloppy stuff for covariate deprecation
    if covariate_id == 1:
        covariate_id = 2
    if covariate_id == 41:
        if "nutrition" not in acause:
            covariate_id = 911
        else:
            covariate_id = 914
    if covariate_id == 78:
        if "nutrition" not in acause:
            covariate_id = 917
        else:
            covariate_id = 920
    if covariate_id == 103:
        covariate_id = 105
    if covariate_id == 104:
        covariate_id = 105
    if covariate_id == 136:
        if "nutrition" not in acause:
            covariate_id = 938
        else:
            covariate_id = 941
    if covariate_id == 153:
        if "nutrition" not in acause:
            covariate_id = 965
        else:
            covariate_id = 968
    if covariate_id in [9, 52, 81, 95, 150, 971]:
        covariate_id = None

    if covariate_id is not None:
        call = '''
            SELECT model_version_id FROM covariate.model_version
            WHERE data_version_id IN (SELECT data_version_id FROM covariate.data_version WHERE covariate_id={cid})
            AND is_best=1 AND gbd_round_id = {gbd}
            '''.format(cid=covariate_id, gbd=gbd_round_id)
        new_model = db_connect.query(call, 'DATABASE')["model_version_id"][0]
    else:
        new_model = np.nan
    return new_model
Пример #11
0
def get_codem_version(model_version_id, db_connection):
    '''
    (str) -> str, str

    Given a model version id gets the branch and commit of the code
    '''
    call = "SELECT code_version FROM cod.model_version WHERE model_version_id = {0}"
    result = json.loads(
        db_connect.query(call.format(model_version_id),
                         connection=db_connection)["code_version"][0])
    return result["branch"], result["commit"]
Пример #12
0
def show_model_status(model_version_id, db_conn="INSERT_DATABASE_NAME"):
    """
    Pulls the logs from the DB of a model run.
    """
    call = '''
    SELECT model_version_log_entry, date_inserted
    FROM cod.model_version_log WHERE model_version_id = {}
    ORDER BY date_inserted DESC
    '''.format(model_version_id)
    df = db_connect.query(call, db_conn)
    return df
Пример #13
0
def get_model_covariates(model_version_id, db_connection):
    """
    :param model_version_id:
    :return:
    """
    call = '''
    SELECT covariate_model_version_id FROM cod.model_covariate
        WHERE model_version_id = "{model_version_id}"
    '''.format(model_version_id=model_version_id)
    covariate_ids = np.array(query(call, db_connection))[:, 0]
    return covariate_ids
Пример #14
0
    def get_modeled_locations(self):
        """

        """
        loc_ids = tuple(self.df.location_id.unique())
        call = """
               SELECT location_name
               FROM shared.location
               WHERE location_id in {}
               """.format(loc_ids)
        loc_names = db_connect.query(call, self.db_connection).values.tolist()
        return pd.DataFrame({"id": loc_ids, "name": loc_names})
Пример #15
0
def get_submodels(model_version_id, db_connection):
    """
    :param model_version_id: integer representing a codem model version id
    :return: Pandas Data frame with submodels and corresponding information
    """
    call = '''
    SELECT submodel_version_id, rank, weight, submodel_type_id, submodel_dep_id
    FROM cod.submodel_version
    WHERE model_version_id = {model_version_id}
    '''.format(model_version_id=model_version_id)
    df = query(call, db_connection)
    return df
Пример #16
0
def get_modeler(acause, connection):
    '''
    string -> string

    Look up the modeler for the cause being modeled in the cod.modeler table
    '''
    call = """ SELECT m.username
        FROM cod.modeler m
            INNER JOIN shared.cause c
            ON m.cause_id = c.cause_id
        WHERE c.acause = '{acause}'
        """.format(acause=acause)
    modeler = query(call, connection)
    return modeler.ix[0, 0].split(', ')
Пример #17
0
def get_broken_models(db_connection):
    '''
    () -> 1d-array

    Gets all the runs of codem that that are on the production database that
    failed the run and returns their model version id numbers in an array.
    '''
    call = '''
    SELECT model_version_id FROM cod.model_version
        WHERE description = "2015 full run test 6/10"
        AND pv_rmse_in IS NULL;
    '''
    broken_models = np.array(query(call, db_connection))
    return sorted(broken_models[:, 0])
Пример #18
0
def all_submodel_covs(model_version_id, db_connection):
    """
    :param model_version_id: integer representing a codem model version id
    :return: Pandas Data frame with submodels, covariates, and corresponding information
    """
    submodels = get_submodels(model_version_id, db_connection)
    covs = pd.concat([submodel_covs(x) for x in submodels.submodel_version_id],
                     axis=0).reset_index(drop=True)
    df = covs.merge(submodels, how="left")
    df = df.sort(["rank", "covariate_name_short"])
    call = '''
    SELECT submodel_type_id, submodel_type_name FROM cod.submodel_type;
    '''
    df2 = query(call, db_connection)
    df = df.merge(df2, how="left")
    call = '''
    SELECT submodel_dep_id, submodel_dep_name FROM cod.submodel_dep;
    '''
    df2 = query(call, db_connection)
    df = df.merge(df2, how="left")
    df.drop(["submodel_type_id", "submodel_dep_id"], inplace=True, axis=1)
    df = df.sort(["rank", "covariate_name_short"])
    df["approximate_draws"] = np.round(df.weight.values * 1000.)
    return df
Пример #19
0
def current_cause_set_id(gbd_round_id, db_connection):
    """
    Looks up the latest 2015 cause set id for codem

    :return: int
        latest 2015 cause set id
    """
    call = '''
    SELECT cause_set_version_id FROM shared.cause_set_version_active
    WHERE cause_set_id = 4 # referes to the set of causes specific to codem
    AND gbd_round_id = {};
    '''.format(gbd_round_id)
    cause_set_version_id = \
        db_connect.query(call, db_connection)["cause_set_version_id"][0]
    return cause_set_version_id
Пример #20
0
def current_location_set_id(gbd_round_id, db_connection):
    """
    Looks up the latest 2015 location set id for codem

    :return: int
        latest 2015 location set id
    """
    call = '''
    SELECT location_set_version_id FROM shared.location_set_version_active
    WHERE location_set_id = 35 # this location set id refres specifically to the hierarchy that codem uses
    AND gbd_round_id = {};
    '''.format(gbd_round_id)
    location_set_version_id = \
        db_connect.query(call, db_connection)["location_set_version_id"][0]
    return location_set_version_id
Пример #21
0
def get_model_metadata(model_version_ids, db):
    model_list = ', '.join([str(model) for model in model_version_ids])

    call = '''
    SELECT mv.model_version_id, mv.model_version_type_id, mv.sex_id, mv.cause_id, sc.cause_name, mv.age_start, mv.age_end
    FROM cod.model_version mv
    INNER JOIN shared.cause sc
    ON mv.cause_id = sc.cause_id
    WHERE mv.model_version_id IN ({mvids})
    '''.format(mvids=model_list)

    print call

    df = db_connect.query(call, db)

    return df
Пример #22
0
def best_model_covs(acause, db_connection, year=2015):
    """

    :param acause: string representing a valid acause
    :return: Pandas Data frame with submodels, covariates, and corresponding information

    Given a string that represents a valid acause in the GBD heirarchy retruns the best
    models submodel covariates as well as metadata regarding those models.
    """
    call = '''
    SELECT model_version_id FROM cod.model_version
        WHERE cause_id=(SELECT cause_id FROM shared.cause WHERE acause="{acause}")
        AND date_inserted > '{year}-01-01' ORDER BY pv_rmse_out limit 1
    '''.format(acause=acause, year=year)
    model_version_id = np.array(query(call, db_connection))[0, 0]
    return all_submodel_covs(model_version_id, db_connection)
Пример #23
0
def get_broken_models(db_connection, description="relaunch new CoD data"):
    """
    () -> 1d-array

    Gets all the runs of codem that that are on the production database that
    failed the 2015 run and returns their model version id numbers in an array.
    """
    call = '''
    SELECT model_version_id FROM cod.model_version
        WHERE
        (description = "{description}" OR
        description LIKE "%%{description}:%%")
        AND pv_rmse_in IS NULL;
    '''.format(description=description)
    broken_models = np.array(query(call, db_connection))
    return sorted(broken_models[:, 0])
Пример #24
0
def check_model_count(model_version_id, db_connection, broken=False):
    '''
    (int) -> int

    Returns the numne
    '''
    base = "PATH_TO_CV_SELECTED"
    if broken:
        number = len(import_json(base)["cf_vars"]) + len(
            import_json(base)["rate_vars"]) * 2
    else:
        call = '''
        SELECT submodel_version_id FROM cod.submodel_version WHERE model_version_id={model_version_id};
        '''.format(model_version_id=model_version_id)
        df = query(call, db_connection)
        number = df.shape[0]
    return number
Пример #25
0
def get_covariate_id(model_version_id):
    """Given a covariate model version ID find the covariate ID."""
    call = '''
    SELECT DISTINCT
        sc.covariate_id, sc.covariate_name
    FROM
        covariate.model_version cmv
            INNER JOIN
        covariate.data_version cdv ON cdv.data_version_id = cmv.data_version_id
            INNER JOIN
        shared.covariate sc ON sc.covariate_id = cdv.covariate_id
    WHERE
        cmv.model_version_id = {mvid}
    '''.format(mvid=model_version_id)
    covariate_id = db_connect.query(call, 'DATABASE')["covariate_id"][0]

    return covariate_id
Пример #26
0
def get_acause(model_version_id, db_connection):
    """
    Get acause from CODEm model version ID
    :param model_version_id:
    :return: str with acause
    """
    call = '''
    SELECT
        sc.acause AS acause
    FROM
        shared.cause sc
            INNER JOIN
        cod.model_version mv ON sc.cause_id = mv.cause_id
    WHERE
        mv.model_version_id = {mvid}
    '''.format(mvid=model_version_id)
    acause = db_connect.query(call, db_connection)["acause"][0]
    return acause
Пример #27
0
def set_new_covariates(models, db_connection):
    """
    Sets the covariates for all the new models using their prior selected covariates

    :param models: list of int
        list of models to add covariates for
    :param db_connection: str
        db to connect to
    """
    model_str = ', '.join([str(x) for x in models])
    call = '''
    SELECT model_version_id, previous_model_version_id
    FROM cod.model_version
    WHERE model_version_id IN ({model_str});
    '''.format(model_str=model_str)
    df = db_connect.query(call, db_connection)
    for i in range(df.shape[0]):
        new_covariates(df["previous_model_version_id"][i],
                       df["model_version_id"][i], db_connection)
Пример #28
0
def submodel_covs(submodel_version_id, db_connection):
    """
    :param submodel_version_id: integer representing a codem submodel version id
    :return: Pandas data frame with information on submodel covariates

    Given a submodel version id returns the covariates that were used in the
    construction of that model.
    """
    call = '''
    SELECT covariate_name_short FROM shared.covariate
        WHERE covariate_id IN (SELECT covariate_id from covariate.data_version WHERE data_version_id IN
            (SELECT data_version_id FROM covariate.model_version
                WHERE model_version_id IN
                (SELECT covariate_model_version_id FROM cod.submodel_version_covariate
                    WHERE submodel_version_id={submodel_version_id})))
    '''.format(submodel_version_id=submodel_version_id)
    df = query(call, db_connection)
    df["submodel_version_id"] = submodel_version_id
    return df
Пример #29
0
def remove_covariates(df, covariates, db_connection):
    '''
    (df, list) -> df

    Removes the rows containing the input covariates from the input df. The
    covariates in the df are stored as covariate model version ids, and the
    input covariates should be in the covariate_name_short form
    '''
    call = '''SELECT mv.model_version_id
        FROM covariate.model_version mv
        INNER JOIN covariate.data_version dv
        ON mv.data_version_id = dv.data_version_id
        INNER JOIN shared.covariate sc
        ON dv.covariate_id = sc.covariate_id
        WHERE sc.covariate_name_short = "{}"'''
    for covariate in covariates:
        cov_mvids = db_connect.query(call.format(covariate), db_connection)
        cov_mvids = cov_mvids.model_version_id.values
        df = df[~df.covariate_model_version_id.isin(cov_mvids)]
    return df
Пример #30
0
def get_latest_covariate(model_version_id, db_connection):
    '''
    Given a covariate model version id find the best model version id for that
    covariate.

    :param model_version_id: int
        model version id to look up and create duplicate of
    :return: int
        new model version id
    '''
    call = '''
    SELECT model_version_id FROM covariate.model_version
    WHERE data_version_id IN (SELECT data_version_id FROM covariate.data_version
    WHERE covariate_id=(SELECT covariate_id FROM covariate.data_version
                        WHERE data_version_id =(SELECT data_version_id FROM
                                                covariate.model_version
                                                WHERE model_version_id={model})))
                                                AND is_best=1;
    '''.format(model=model_version_id)
    new_model = db_connect.query(call, db_connection)["model_version_id"][0]
    return new_model