Exemplo n.º 1
0
def validate_decomp_step(obj_name, decomp_step, gbd_round_id):
    # validate decomp_step
    decomp_step_id = decomp_step_id_from_decomp_step(decomp_step, gbd_round_id)
    rules_manager = RulesManager(ResearchAreas.EPI,
                                 Tools.SAVE_RESULTS,
                                 decomp_step_id=decomp_step_id)
    validate_step_viewable(obj_name, decomp_step, rules_manager)
Exemplo n.º 2
0
    def __init__(
        self,
        decomp_step: str = None,
        gbd_round_id: int = gbd.GBD_ROUND_ID,
        include_como: bool = False):

        _validate_decomp_step(
            decomp_step,
            gbd_round_id,
            ResearchAreas.EPI,
            Tools.SAVE_RESULTS,
            mark_best=True
        )

        self.include_como = include_como
        self._loaded_maps = {}
        self.all_processes = {}
        self.decomp_step = decomp_step
        self.gbd_round_id = gbd_round_id
        self.decomp_step_id = decomp_step_id_from_decomp_step(
            step=self.decomp_step,
            gbd_round_id=self.gbd_round_id
        )
        self.G = nx.DiGraph() # meid graph
        self.P = nx.DiGraph() # process graph
        self.start_node = Params.EPIC_START_NODE
        self.this_file = os.path.realpath(__file__)
        self.this_dir = os.path.dirname(self.this_file)
        self.load_maps(self.this_dir)
        self.build_graphs(self.this_dir)
        self._best_models = None
        self._inputs = None
        self._outputs = None
Exemplo n.º 3
0
def get_regional_scalars(
    gbd_round_id: int,
    decomp_step: str,
) -> pd.DataFrame:
    """
    Previous iterations of scalars produced sex- and age-specific
    results. Starting GBD2017 scalars are produced with
    location/year-specific detail, at the both-sex and all-age level.
    """
    q = """
        SELECT * FROM mortality.upload_population_scalar_estimate 
        WHERE run_id = (
            SELECT run_id
            FROM mortality.vw_decomp_process_version
            WHERE process_id = 23
            AND gbd_round_id = :gbd_round_id
            AND decomp_step_id = :decomp_step_id
            AND year_id IN :year_ids
            AND is_best = 1)
        """
    params = {
        "gbd_round_id":
        gbd_round_id,
        "decomp_step_id":
        gbd_decomp_step.decomp_step_id_from_decomp_step(
            decomp_step, gbd_round_id),
        "year_ids":
        mmr_constants.OUTPUT_YEARS
    }
    scalars = ezfuncs.query(q, parameters=params, conn_def='mortality')
    scalars = scalars.rename(columns={'mean': 'scaling_factor'})
    scalars = scalars[['location_id', 'year_id', 'scaling_factor']]
    return scalars
Exemplo n.º 4
0
    def new_mvid_list(self):
        mvid_list = ezfuncs.query("""
            SELECT
                cause.cause_id, mv.modelable_entity_id,
                mv.model_version_id, mv.decomp_step_id
            FROM epi.model_version mv
            LEFT JOIN epi.modelable_entity_cause cause
                ON mv.modelable_entity_id = cause.modelable_entity_id
            WHERE
                mv.model_version_status_id = :best
                and mv.gbd_round_id = :gbd_round_id
                and mv.decomp_step_id IN (:decomp_step_id, :iterative)
            """,
                                  conn_def="epi",
                                  parameters={
                                      "best":
                                      1,
                                      "gbd_round_id":
                                      self.gbd_round_id,
                                      "decomp_step_id":
                                      self.decomp_step_id,
                                      "iterative":
                                      decomp_step_id_from_decomp_step(
                                          step="iterative",
                                          gbd_round_id=self.gbd_round_id)
                                  })

        # Determine which decomp step to find model version in
        all_ntd_cause = [
            346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358,
            359, 360, 361, 362, 363, 364, 365, 405, 843, 935, 936
        ]
        ntd_decomp_me = [
            1500, 1503, 10402, 1513, 1514, 1515, 2999, 3109, 20265, 1516, 1517,
            1518, 3001, 3110, 20266, 1519, 1520, 1521, 3000, 3139, 3111, 20009,
            2797, 1474, 1469, 2965, 1475, 10524, 10525, 1476, 2966, 1470, 1471,
            10480, 1477, 10537, 1472, 1468, 1466, 1473, 1465, 16393, 1478
        ]
        new_gbd_2019_cause = [
            1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014,
            1015, 1016, 1017, 628
        ]
        has_decomp_version = mvid_list.loc[
            mvid_list.decomp_step_id ==
            self.decomp_step_id, ].modelable_entity_id.unique().tolist()
        use_iterative = mvid_list.loc[
            ~mvid_list.modelable_entity_id.isin(ntd_decomp_me)
            & mvid_list.cause_id.isin(all_ntd_cause + new_gbd_2019_cause)
            & ~mvid_list.modelable_entity_id.isin(
                has_decomp_version)].modelable_entity_id.unique().tolist()
        mvid_list = mvid_list.loc[
            (mvid_list.decomp_step_id == self.decomp_step_id) |
            (mvid_list.modelable_entity_id.isin(use_iterative))]
        self.mvid_list = mvid_list[[
            'modelable_entity_id', 'model_version_id', 'decomp_step_id'
        ]]
Exemplo n.º 5
0
    def new(cls,
            year_ids: List[int],
            location_set_ids: List[int],
            sex_ids: List[int] = [gbd.sex.MALE, gbd.sex.FEMALE],
            measure_ids: List[int] = [gbd.measures.DEATH, gbd.measures.YLL],
            gbd_round_id: int = gbd.GBD_ROUND_ID,
            decomp_step: str = gbd.decomp_step.ONE,
            n_draws: int = constants.Draws.N_DRAWS,
            process: str = GBD_PROCESS,
            databases: List[str] = [constants.DataBases.GBD],
            year_start_ids: Optional[List[int]] = None,
            year_end_ids: Optional[List[int]] = None):

        version_id: int = cls.get_new_version_id()
        inst = cls(version_id)

        inst.year_ids: Optional[List[int]] = year_ids
        inst.year_start_ids: Optional[List[int]] = year_start_ids
        inst.year_end_ids: Optional[List[int]] = year_end_ids
        inst.location_set_ids: Optional[List[int]] = location_set_ids
        inst.sex_ids: Optional[List[int]] = sex_ids
        inst.measure_ids: Optional[List[int]] = measure_ids
        inst.gbd_round_id: Optional[int] = gbd_round_id
        inst.decomp_step: Optional[str] = decomp_step
        inst.decomp_step_id: Optional[int] = decomp_step_id_from_decomp_step(
            step=decomp_step, gbd_round_id=gbd_round_id)
        inst.n_draws: Optional[int] = n_draws
        inst.process: Optional[str] = process
        # scalar_version_id will be retrieved for FauxcCorrect AND CoDCorrect
        # runs but will only be used in FauxCorrect runs
        inst.scalar_version_id: Optional[int] = (
            fauxcorrect_version.get_scalar_version_id(gbd_round_id))
        inst.databases: Optional[List[str]] = databases
        # Need to instantiate envelope, population, and life table parameter
        # objs before creating the process version row.
        inst._create_envelope_parameter()
        inst._create_population_parameter()
        inst._create_life_table_parameter()
        metadata, version_note = inst._create_process_version_info(process)
        # Create version row in the appropriate table for the run
        inst.create_new_version_row(version_id, inst.DESCRIPTION,
                                    inst.decomp_step_id, inst.gbd_round_id)
        # Create gbd process version id for the run
        inst.gbd_process_version_id = (inst.create_gbd_process_version(
            metadata, version_note))
        # Create parameter objects
        inst._create_cause_parameters()
        inst._create_location_parameters()
        inst._create_model_version_parameter()
        # Create eligible metadata
        inst._create_eligible_metadata()

        return inst
Exemplo n.º 6
0
 def __init__(self,
              conn_def,
              codcorrect_vers=None,
              decomp_step=None,
              gbd_round_id=None):
     self.conn_def = conn_def
     self.codcorrect_vers = codcorrect_vers
     self.decomp_step = decomp_step
     self.gbd_round_id = gbd_round_id
     if decomp_step:
         self.decomp_step_id = decomp_step_id_from_decomp_step(
             self.decomp_step, self.gbd_round_id)
Exemplo n.º 7
0
    def get_best_models(self) -> pd.DataFrame:
        mvid_list = ezfuncs.query(
            """
            SELECT
                cause.cause_id, mv.modelable_entity_id,
                mv.model_version_id, mv.decomp_step_id, mv.best_start
            FROM epi.model_version mv
            LEFT JOIN epi.modelable_entity_cause cause
                ON mv.modelable_entity_id = cause.modelable_entity_id
            WHERE
                mv.model_version_status_id = :best
                and mv.gbd_round_id = :gbd_round_id
                and mv.decomp_step_id IN (:decomp_step_id, :iterative)
            """,
            conn_def="epi",
            parameters={
                "best": 1,
                "gbd_round_id": self.gbd_round_id,
                "decomp_step_id": self.decomp_step_id,
                "iterative": decomp_step_id_from_decomp_step(
                    step=gbd.decomp_step.ITERATIVE,
                    gbd_round_id=self.gbd_round_id
                )
            }
        )

        # Determine what version (decomp/iterative) to use:
        all_ntd_cause = [346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356,
                         357, 358, 359, 360, 361, 362, 363, 364, 365, 405, 843,
                         935, 936]
        ntd_decomp_me = [1500, 1503, 10402, 1513, 1514, 1515, 2999, 3109,
                         20265, 1516, 1517, 1518, 3001, 3110, 20266, 1519,
                         1520, 1521, 3000, 3139, 3111, 20009, 2797, 1474,
                         1469, 2965, 1475, 10524, 10525, 1476, 2966, 1470,
                         1471, 10480, 1477, 10537, 1472, 1468, 1466, 1473,
                         1465, 16393, 1478]
        new_gbd_2019_cause = [1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011,
                              1012, 1013, 1014, 1015, 1016, 1017]
        has_decomp_version = mvid_list.loc[
            mvid_list.decomp_step_id == self.decomp_step_id,
            ].modelable_entity_id.unique().tolist()
        use_iterative = mvid_list.loc[
            ~mvid_list.modelable_entity_id.isin(ntd_decomp_me)
            & mvid_list.cause_id.isin(all_ntd_cause + new_gbd_2019_cause)
            & ~mvid_list.modelable_entity_id.isin(has_decomp_version)
            ].modelable_entity_id.unique().tolist()
        mvid_list = mvid_list.loc[
            (mvid_list.decomp_step_id == self.decomp_step_id) |
            (mvid_list.modelable_entity_id.isin(use_iterative))]
        mvid_list["decomp_step"] = mvid_list["decomp_step_id"].apply(
            decomp_step_from_decomp_step_id)
        return mvid_list
Exemplo n.º 8
0
def get_best_codcorrect_vers(decomp_step, gbd_round_id):
    decomp_step_id = decomp_step_id_from_decomp_step(decomp_step, gbd_round_id)
    params = {"decomp_step_id": decomp_step_id, "gbd_round_id": gbd_round_id}
    q = """
            SELECT MAX(CAST(output_version_id AS UNSIGNED)) as id
            FROM cod.output_version
            WHERE status = 1 AND is_best = 1
            AND decomp_step_id = :decomp_step_id
            AND code_version = :gbd_round_id
        """

    result = query(q, parameters=params, conn_def='cod')
    return int(result.at[0, 'id'])
Exemplo n.º 9
0
def _validate_in_sync(
        bundle_id: int,
        gbd_round_id: int,
        decomp_step: str,
        crosswalk_version_id: Optional[int],
        session: orm.Session
) -> None:
    """
    Thin wrapper for elmo's validate_in_sync.
    We only want to call the function when the crosswalk version exists.
    """
    if not crosswalk_version_id:
        return

    try:
        elmo_validate.validate_in_sync(
            bundle_id=bundle_id,
            gbd_round_id=gbd_round_id,
            decomp_step=decomp_step,
            session=session,
            crosswalk_version_id=crosswalk_version_id
        )
    except ValueError as e:
        # No previous best model found
        if decomp_step != ds.TWO:
            previous_step = decomp_step_from_decomp_step_id(
                helpers.get_previous_decomp_step_id(
                    decomp_step_id_from_decomp_step(
                        decomp_step, gbd_round_id)))
            previous_step_str = ' AND the previous decomp step (GBD round ' \
                f'ID {gbd_round_id}, {previous_step})'
        else:
            previous_step_str = ''
        raise ValueError(
            str(e) +
            '\n\nYou must have a best model with an '
            f'associated crosswalk version from the previous round '
            f'(GBD round ID {gbd_round_id - 1}){previous_step_str}. '
            'If this is a custom bundle, don\'t pass in a crosswalk version.')
Exemplo n.º 10
0
def best_versions(cause_id, gbd_round_id, sex_id, decomp_step):
    decomp_step_id = decomp_step_id_from_decomp_step(decomp_step, gbd_round_id)
    q = """
        SELECT model_version_id 
        FROM cod.model_version
        WHERE cause_id={cid} AND 
              sex_id = {sex} AND 
              gbd_round_id={gbd} AND
              is_best = 1 AND
              model_version_type_id IN (3, 4) AND
              decomp_step_id = {decomp}
        """.format(cid=cause_id,
                   sex=sex_id,
                   gbd=gbd_round_id,
                   decomp=decomp_step_id)
    res = query(q, conn_def='cod')
    mvids = res.model_version_id.tolist()
    bad_mvids_msg = ("Error: Returned more than one model_version_id: "
                     "{mvids} for cause_id: {cid}. Cannot split model.".format(
                         mvids=mvids, cid=cause_id))
    if len(mvids) != 1:
        raise RuntimeError(bad_mvids_msg)
    return mvids[0]
Exemplo n.º 11
0
def best_version(input_machine, gbd_round_id, decomp_step):
    """Find the 'best' model versions for como and cod from the database.
    Used as defaults
    """
    if input_machine == 'como':
        gbd_process_id = c.gbd_process['EPI']
        metadata_type_id = c.gbd_metadata_type['COMO']
    elif input_machine == 'codcorrect':
        gbd_process_id = c.gbd_process['COD']
        metadata_type_id = c.gbd_metadata_type['CODCORRECT']
    elif input_machine == 'fauxcorrect':
        gbd_process_id = c.gbd_process['FAUXCORRECT']
        metadata_type_id = c.gbd_metadata_type['FAUXCORRECT']
    else:
        raise ValueError("app_common.best_version accepts 'como', "
                         "'codcorrect' or 'fauxcorrect' as "
                         "input_machine. Got {}".format(input_machine))
    decomp_step_id = decomp_step_id_from_decomp_step(decomp_step, gbd_round_id)
    q = """
            SELECT
            pvm.val as version_id from gbd.gbd_process_version gpv
            JOIN
            gbd.gbd_process_version_metadata pvm using (gbd_process_version_id)
            WHERE
            gbd_process_id =  {process_id}
            and metadata_type_id = {metadata_type_id}
            and gbd_round_id = {gbd_round_id}
            and decomp_step_id = {decomp_step_id}
            and gbd_process_version_status_id = 1
            order by gpv.date_inserted desc
            limit 1
            """.format(process_id=gbd_process_id,
                       metadata_type_id=metadata_type_id,
                       gbd_round_id=gbd_round_id,
                       decomp_step_id=decomp_step_id)
    return int(ezfuncs.query(q, conn_def='gbd').squeeze())
Exemplo n.º 12
0
def generate_maps(decomp_step: int, gbd_round_id: int) -> None:
    generate_como_config(
        decomp_step_id_from_decomp_step(decomp_step, gbd_round_id),
        gbd_round_id)
    generate_severity_split_config(gbd_round_id)
    generate_super_squeeze_config()