def main(nid, extract_type_id, code_system_id, launch_set_id):

    cause_set_version_id = CONF.get_id('cause_set_version')
    location_set_version_id = CONF.get_id('location_set_version')
    pop_run_id = CONF.get_id('pop_run')
    env_run_id = CONF.get_id('env_run')
    distribution_set_version_id = CONF.get_id('distribution_set_version')

    # download data from input database
    df = get_claude_data('formatted',
                         nid=nid,
                         extract_type_id=extract_type_id,
                         location_set_version_id=location_set_version_id)

    assert len(df) != 0, ("Dataframe is empty."
                          " Are you sure this source is in"
                          "the input database?")
    # run the pipeline
    df = run_pipeline(nid,
                      extract_type_id,
                      launch_set_id,
                      df,
                      code_system_id,
                      cause_set_version_id,
                      location_set_version_id,
                      pop_run_id,
                      env_run_id,
                      distribution_set_version_id,
                      diagnostic=False)
    # upload to database
    write_phase_output(df, 'disaggregation', nid, extract_type_id,
                       launch_set_id)
示例#2
0
def main(nid, extract_type_id, launch_set_id):
    """Read the data, run the phase, write the output."""
    print_log_message("Reading {} data".format(PHASE_ANTECEDENT))
    df = get_claude_data(PHASE_ANTECEDENT,
                         nid=nid,
                         extract_type_id=extract_type_id)

    env_run_id = int(CONF.get_id('env_run'))
    pop_run_id = int(CONF.get_id('pop_run'))
    location_set_version_id = int(CONF.get_id('location_set_version'))
    cause_set_version_id = int(CONF.get_id('cause_set_version'))

    df = df.rename(columns={'cf': 'cf_final'})

    df = run_phase(df, nid, extract_type_id, env_run_id, pop_run_id,
                   location_set_version_id, cause_set_version_id)

    print_log_message(
        "Writing {n} rows of output for launch set {ls}, nid {nid}, extract "
        "{e}".format(n=len(df), ls=launch_set_id, nid=nid, e=extract_type_id))
    write_phase_output(df, PHASE_NAME, nid, extract_type_id, launch_set_id)
def main(nid, extract_type_id, code_system_id, launch_set_id):

    cause_set_version_id = CONF.get_id('cause_set_version')
    location_set_version_id = CONF.get_id('location_set_version')
    pop_run_id = CONF.get_id('pop_run')
    env_run_id = CONF.get_id('env_run')

    # need to use special age/sex distribution for Norway based on National data
    if get_value_from_nid(nid, 'iso3',
                          extract_type_id=extract_type_id) == 'NOR':
        distribution_set_version_id = CONF.get_id(
            'NOR_distribution_set_version')
    else:
        distribution_set_version_id = CONF.get_id('distribution_set_version')

    # download data from input database
    df = get_claude_data('formatted',
                         nid=nid,
                         extract_type_id=extract_type_id,
                         location_set_version_id=location_set_version_id)

    assert len(df) != 0, ("Dataframe is empty."
                          " Are you sure this source is in"
                          "the input database?")
    # run the pipeline
    df = run_pipeline(nid,
                      extract_type_id,
                      launch_set_id,
                      df,
                      code_system_id,
                      code_map_version_id,
                      cause_set_version_id,
                      location_set_version_id,
                      pop_run_id,
                      env_run_id,
                      distribution_set_version_id,
                      diagnostic=False)
    # upload to database
    write_phase_output(df, 'disaggregation', nid, extract_type_id,
                       launch_set_id)
示例#4
0
    def get_redistribution_envelope(self, agg_df, agg_cause_ids):

        print_log_message("Getting cause-package map")
        cause_package_map = get_cause_package_map(
            self.code_system_id,
            remove_decimal=self.remove_decimal,
            cause_map=self.cause_map,
            package_map=self.package_map)

        print_log_message("Getting claude data - disaggregation")
        raw_df = get_claude_data("disaggregation",
                                 nid=self.nid,
                                 extract_type_id=self.extract_type_id)

        # if there is no garbage in the raw data, then set to 0
        if 743 not in raw_df.cause_id.unique():
            return pd.DataFrame()

        print_log_message("Getting package-targets relation")
        package_targets = get_package_targets(
            self.code_system_id,
            recurse_garbage_targets=False,
            remove_decimal=self.remove_decimal,
            force_rerun=False,
            block_rerun=True)
        package_targets = self.remove_package_1(package_targets)
        id_to_package_map = \
            cause_package_map.query('map_type == "package_id"')[
                ['code_id', 'map_id']
            ]
        id_to_package_map.rename(columns={'map_id': 'package_id'},
                                 inplace=True)

        print_log_message("Mapping raw data to package ids")
        raw_df = raw_df.merge(id_to_package_map, on='code_id', how='left')
        report_if_merge_fail(raw_df.query('cause_id == 743'), 'package_id',
                             'code_id')

        dfs = []
        print_log_message("Looping over {} causes".format(len(agg_cause_ids)))
        group_cols = [
            'location_id', 'year_id', 'age_group_id', 'sex_id', 'site_id'
        ]
        square_df = agg_df[group_cols].drop_duplicates()
        bad_nid_extract_pairs = [(69913, 1), (69918, 1), (69922, 1),
                                 (93739, 1)]
        nid_extract_pair = (self.nid, self.extract_type_id)
        if nid_extract_pair not in bad_nid_extract_pairs:
            p1_df = self.get_package_1_deaths()

        for cause_id in agg_cause_ids:
            cause_ids = get_all_related_causes(cause_id, self.cause_hierarchy)
            package_ids = list(
                package_targets[package_targets['cause_id'].isin(
                    cause_ids)]['package_id'].unique())

            df = raw_df[raw_df['package_id'].isin(package_ids)].copy()
            df = df.groupby(group_cols, as_index=False)['deaths'].sum()
            df = square_df.merge(df, how='left')
            df['deaths'] = df['deaths'].fillna(0)
            df = df.rename(columns={'deaths': 'garbage_targeting_cause'})
            df['cause_id'] = cause_id
            dfs.append(df)
            if nid_extract_pair not in bad_nid_extract_pairs:
                # Do the same for Package 1 deaths
                p1_cause_df = p1_df[p1_df.cause_id.isin(cause_ids)]
                p1_cause_df = p1_cause_df.groupby(
                    ['location_id', 'year_id', 'age_group_id', 'sex_id'],
                    as_index=False).freq.sum()
                p1_cause_df = square_df.merge(p1_cause_df, how='left')
                p1_cause_df['freq'] = p1_cause_df['freq'].fillna(0)
                p1_cause_df = p1_cause_df.rename(
                    columns={'freq': 'garbage_targeting_cause'})
                p1_cause_df['cause_id'] = cause_id
                dfs.append(p1_cause_df)

        print_log_message("Concatenating")
        df = pd.concat(dfs, ignore_index=True)
        df = df.groupby(group_cols + ['cause_id'], as_index=False).sum()

        return df