Exemplo n.º 1
0
    def get_code_id_for_package_name(self, df, code_system_id):
        assert "target_package" in df, \
            "Input data must have a 'target_package' column."

        cause_map = (engine_room.get_cause_map(
            code_system_id,
            **self.standard_cache_options).loc[:, ["value", "code_id"]])
        cause_map["value"] = misc.clean_icd_codes(cause_map.value, True)

        pkg = (
            engine_room.get_package_list(code_system_id,
                                         include_garbage_codes=True,
                                         **self.standard_cache_options).
            pipe(engine_room.remove_five_plus_digit_icd_codes,
                 "garbage_code",
                 code_system_id=code_system_id).assign(
                     value=lambda d: misc.clean_icd_codes(
                         d.garbage_code, True)).loc[:,
                                                    ["package_name", "value"]]
            # pick first garbage code found in the package
            .drop_duplicates("package_name", keep="first").merge(
                cause_map, how="left").loc[:, ["package_name", "code_id"]])

        df = df.merge(pkg,
                      left_on="target_package",
                      right_on="package_name",
                      how="left")

        # assert that we found a code for each package in target_package
        # where target_package is not NaN
        misc.report_if_merge_fail(df[df.target_package.notna()], "code_id",
                                  "target_package")
        return df
Exemplo n.º 2
0
 def make_cause_fractions(self, df):
     if 'mean_env' not in df.columns:
         df = add_envelope(df, env_df=self.env_df)
         report_if_merge_fail(
             df, 'mean_env',
             ['sex_id', 'age_group_id', 'year_id', 'location_id'])
     for col in self.deaths_cols:
         df['cf' + col.split('deaths')[1]] = df[col] / df['mean_env']
     df = df.drop('mean_env', axis=1)
     return df
Exemplo n.º 3
0
    def make_age_standardized_group(self, df, age_weight_dict):
        df = df.copy()
        # get the age weights
        df['weight'] = df['age_group_id'].map(age_weight_dict)
        report_if_merge_fail(df, 'weight', 'age_group_id')

        df = add_population(df, pop_df=self.pop_df)
        report_if_merge_fail(
            df, "population",
            ['sex_id', 'age_group_id', 'year_id', 'location_id'])
        df = add_envelope(df, env_df=self.env_df)
        report_if_merge_fail(
            df, "mean_env",
            ['sex_id', 'age_group_id', 'year_id', 'location_id'])

        # age standardized deaths rates = deaths / population * weight
        for col in self.cf_cols:
            df[col] = (
                (df[col] * df['mean_env']) / df['population']) * df['weight']

        df['age_group_id'] = 27
        df = df.drop(['weight', 'population', 'mean_env'], axis=1)
        df = df.groupby(self.id_cols,
                        as_index=False)[self.cf_cols + ['sample_size']].sum()

        report_duplicates(df, self.id_cols)

        return df
Exemplo n.º 4
0
    def read_age_restriction_targets_df(self, code_system_id):
        df = (
            pd.read_csv(
                self.art_path.format(code_system_id=code_system_id)).merge(
                    self.cause_meta_df[["acause", "cause_id"]],
                    how="left")  # add cause_id for source
            .pipe(self.expand_age_logic_to_age_group_id)  # add age_group_id
            .pipe(self.get_code_id_for_package_name,
                  code_system_id)  # add code_id
            .rename(columns={"code_id": "target_code_id"})
            # acause is NaN only when there is a GC in target_code_id
            .fillna(
                {
                    "target_acause": "_gc",
                    "is_young_invalid": 0,
                    "override_cause_hierarchy": 0,
                }).merge(
                    self.cause_meta_df[["acause", "cause_id"]].rename(
                        columns={
                            "acause": "target_acause",
                            "cause_id": "target_cause_id"
                        }),
                    how="left",
                ))  # add cause_id for target

        misc.report_if_merge_fail(df, "cause_id", "acause")

        misc.report_if_merge_fail(df, "target_cause_id", "target_acause")

        return df[[
            "cause_id",
            "is_young_invalid",
            "override_cause_hierarchy",
            "age_group_id",
            "target_cause_id",
            "target_code_id",
        ]]
Exemplo n.º 5
0
    def simple_aggregate(self):
        """Aggregate location_ids to country level."""
        df = self.df.copy()
        country_location_ids = \
            get_country_level_location_id(df.location_id.unique(),
                                          self.location_meta_df)
        df = df.merge(country_location_ids, how='left', on='location_id')
        report_if_merge_fail(df, 'country_location_id', ['location_id'])
        df = df[df['location_id'] != df['country_location_id']]
        df['location_id'] = df['country_location_id']
        df = df.drop(['country_location_id'], axis=1)

        # want to collapse site_id for national level
        group_cols = [col for col in df.columns if col not in self.val_cols]
        group_cols.remove('site_id')
        df = df.groupby(group_cols, as_index=False)[self.val_cols].sum()

        # set site_id for national aggregates (cannot be missing)
        df['site_id'] = 2

        # append national aggregates to the incoming dataframe
        df = df.append(self.df)

        return df