Exemplo n.º 1
0
    (sev_version_id, rei_id, gbd_round_id, n_draws,
     location_set_id) = parse_arguments()

    drawdir = 'FILEPATH/{}/draws'.format(sev_version_id)

    # set up source and sink
    source_params = {
        'draw_dir': drawdir,
        'file_pattern': '{rei_id}/{location_id}.csv'
    }
    source = DrawSource(source_params)
    sink_params = {
        'draw_dir': drawdir,
        'file_pattern': '{rei_id}/{location_id}.csv'
    }
    sink = DrawSink(sink_params,
                    write_func=partial(standard_write_func, index=False))

    index_cols = [
        'rei_id', 'year_id', 'age_group_id', 'sex_id', 'measure_id',
        'metric_id'
    ]
    draw_cols = ['draw_{}'.format(i) for i in range(n_draws)]

    for lsid in location_set_id:
        popfile = os.path.join(drawdir, 'population_{}.csv'.format(lsid))
        population = pd.read_csv(popfile)

        # aggregation operator
        operator = WtdSum(
            index_cols=index_cols,
            value_cols=draw_cols,
Exemplo n.º 2
0
 def sequela_result_sink(self):
     return DrawSink(self.get_params_by_component("sequela", "draws"))
Exemplo n.º 3
0
        sex_id = config['eligible_sex_ids']

        # Create draw source/sink
        logging.info("Creating draw source and sink.")
        draw_dir = os.path.join(parent_dir, 'aggregated/{}'.format(df_type))
        input_pattern = '{measure_id}_{location_id}_{year_id}.h5'
        source_config = {'draw_dir': draw_dir, 'file_pattern': input_pattern}
        draw_source = DrawSource(source_config)

        output_pattern = '{measure_id}_{location_id}_{year_id}.h5'
        sink_config = {
            'draw_dir': draw_dir,
            'file_pattern': output_pattern,
            'h5_tablename': 'draws'
        }
        draw_sink = DrawSink(sink_config)

        # Apply regional scalar transform
        region_locs = get_location_metadata(gbd_round_id=GBD.GBD_ROUND_ID,
                                            location_set_id=35)
        region_locs = region_locs[region_locs.level == 2].location_id.tolist()
        draw_sink.add_transform(apply_regional_scalars,
                                region_locs=region_locs,
                                parent_dir=parent_dir)
        draw_sink.add_transform(transform_add_measure, measure_id=measure_id)

        # create operator
        logging.info("Reading regional scalars from flatfiles.")
        index_cols = [col for col in index_cols if col != 'location_id']
        operator = Sum(index_cols, draw_cols)
Exemplo n.º 4
0
def save_all_draws(parent_dir,
                   index_columns,
                   rescaled_data,
                   shock_data,
                   unscaled_data,
                   measure_id=1):
    for data in [rescaled_data, shock_data, unscaled_data]:
        for i in index_columns:
            data[i] = data[i].astype(int)

    rescaled_params = {
        'draw_dir': os.path.join(parent_dir, 'aggregated/rescaled'),
        'file_pattern': '{measure_id}_{location_id}_{year_id}.h5',
        'h5_tablename': 'draws'
    }
    rescaled_sink = DrawSink(rescaled_params)
    rescaled_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id)
    rescaled_sink.push(rescaled_data, append=False)

    unscaled_params = {
        'draw_dir': os.path.join(parent_dir, 'aggregated/unscaled'),
        'file_pattern': '{measure_id}_{location_id}_{year_id}.h5',
        'h5_tablename': 'draws'
    }
    unscaled_sink = DrawSink(unscaled_params)
    unscaled_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id)
    unscaled_sink.push(unscaled_data, append=False)

    shocks_params = {
        'draw_dir': os.path.join(parent_dir, 'aggregated/shocks'),
        'file_pattern': '{measure_id}_{location_id}_{year_id}.h5',
        'h5_tablename': 'draws'
    }
    shocks_sink = DrawSink(shocks_params)
    shocks_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id)
    shocks_sink.push(shock_data, append=False)
Exemplo n.º 5
0
    def new_population(self, location_set_id, agg_loc_sets=[]):
        dim = self.nonfatal_dimensions.get_simulation_dimensions(
            self.measure_id)
        df = get_population(
            age_group_id=(
                dim.index_dim.get_level("age_group_id") + [164]),
            location_id=dbtrees.loctree(location_set_id=location_set_id,
                                        gbd_round_id=self.gbd_round_id
                                        ).node_ids,
            sex_id=dim.index_dim.get_level("sex_id"),
            year_id=dim.index_dim.get_level("year_id"))
        index_cols = ["location_id", "year_id", "age_group_id", "sex_id"]
        data_cols = ["population"]

        io_mock = {}
        source = DrawSource({"draw_dict": io_mock, "name": "tmp"},
                            mem_read_func)
        sink = DrawSink({"draw_dict": io_mock, "name": "tmp"}, mem_write_func)
        sink.push(df[index_cols + data_cols])

        # location
        for set_id in agg_loc_sets:
            loc_tree = dbtrees.loctree(
                location_set_id=set_id,
                gbd_round_id=self.gbd_round_id)
            operator = Sum(
                index_cols=[col for col in index_cols if col != "location_id"],
                value_cols=data_cols)
            aggregator = AggSynchronous(
                draw_source=source,
                draw_sink=sink,
                index_cols=[col for col in index_cols if col != "location_id"],
                aggregate_col="location_id",
                operator=operator)
            aggregator.run(loc_tree)

        # age
        for age_group_id in ComoSummaries._gbd_compare_age_group_list:
            age_tree = dbtrees.agetree(age_group_id)
            operator = Sum(
                index_cols=[col for col in index_cols if col != "age_group_id"
                            ],
                value_cols=data_cols)
            aggregator = AggSynchronous(
                draw_source=source,
                draw_sink=sink,
                index_cols=[col for col in index_cols if col != "age_group_id"
                            ],
                aggregate_col="age_group_id",
                operator=operator)
            aggregator.run(age_tree)

        # sex
        sex_tree = dbtrees.sextree()
        operator = Sum(
            index_cols=[col for col in index_cols if col != "sex_id"],
            value_cols=data_cols)
        aggregator = AggSynchronous(
            draw_source=source,
            draw_sink=sink,
            index_cols=[col for col in index_cols if col != "sex_id"],
            aggregate_col="sex_id",
            operator=operator)
        aggregator.run(sex_tree)
        df = source.content()
        df.to_hdf(
            "{}/info/population.h5".format(self.como_dir),
            'draws',
            mode='w',
            format='table',
            data_columns=["location_id", "year_id", "age_group_id", "sex_id"])
Exemplo n.º 6
0
def save_all_draws(parent_dir,
                   ylls,
                   yll_shocks,
                   location_id,
                   index_columns,
                   measure_id=4):
    # Save yll data
    agg_rescaled_params = {
        'draw_dir': os.path.join(parent_dir, 'aggregated/rescaled'),
        'file_pattern': '{measure_id}_{location_id}_{year_id}.h5',
        'h5_tablename': 'draws'
    }
    rescaled_sink = DrawSink(agg_rescaled_params)
    rescaled_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id)
    rescaled_sink.push(ylls, append=False)

    agg_shocks_params = {
        'draw_dir': os.path.join(parent_dir, 'aggregated/shocks'),
        'file_pattern': '{measure_id}_{location_id}_{year_id}.h5',
        'h5_tablename': 'draws'
    }
    shocks_sink = DrawSink(agg_shocks_params)
    shocks_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id)
    shocks_sink.push(yll_shocks, append=False)
Exemplo n.º 7
0
def _save_all_ylls_fauxcorrect(
        ylls: pd.DataFrame,
        yll_shocks: pd.DataFrame,
        parent_dir: str,
        location_id: int,
        sex_id: int,
        measure_id: int = 4
) -> None:
    """Save YLLs for given location and sex"""
    ylls_sink = DrawSink({
        'draw_dir': join(
            parent_dir, FilePaths.DRAWS_SCALED_DIR, FilePaths.YLLS_DIR
        ),
        'file_pattern': FilePaths.YLL_DRAWS_FILE_PATTERN.format(
            sex_id=sex_id, location_id=location_id
        ),
        'h5_tablename': Keys.DRAWS
    })
    ylls_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id)
    ylls_sink.push(ylls, append=False)

    shocks_sink = DrawSink({
        'draw_dir': join(
            parent_dir,
            FilePaths.UNAGGREGATED_DIR,
            FilePaths.SHOCKS_DIR,
            FilePaths.YLLS_DIR
        ),
        'file_pattern': FilePaths.YLL_DRAWS_FILE_PATTERN.format(
            sex_id=sex_id, location_id=location_id
        ),
        'h5_tablename': Keys.DRAWS
    })
    shocks_sink.add_transform(add_measure_id_to_sink, measure_id=measure_id)
    shocks_sink.push(yll_shocks, append=False)
Exemplo n.º 8
0
 def gen_draw_sink(self):
     sink = DrawSink(self._mem_io_params, mem_write_func)
     sink.add_transform(sort_index_columns, self.dimensions.index_names)
     return sink
Exemplo n.º 9
0
def location_aggregate_birth_counts(gbd_round_id: int, decomp_step: str,
                                    constants_path: pathlib.PosixPath,
                                    location_set_id: int) -> None:
    """
    for given gbd_round, decomp_step, location_set_id, get a complete
    set of location-aggregated live births
    """

    logger.info(f'aggregating for location_set_id {location_set_id}')
    multiple_tree_flag = (location_set_id
                          in mmr_constants.MULTIPLE_ROOT_LOCATION_SET_IDS)

    scalars = get_regional_scalars(gbd_round_id, decomp_step)
    index_cols = ['location_id', 'year_id', 'age_group_id', 'sex_id']

    cov_estimate_filename = (
        mmr_constants.COV_ESTIMATES_FORMAT_FILENAME.format(location_set_id))

    region_locs, most_detailed_locs = get_location_level_sets(
        gbd_round_id=gbd_round_id,
        decomp_step=decomp_step,
        location_set_id=location_set_id)

    save_birth_count_estimates(gbd_round_id=gbd_round_id,
                               decomp_step=decomp_step,
                               cov_estimate_filepath=constants_path /
                               cov_estimate_filename,
                               location_set_id=location_set_id,
                               most_detailed_locs=most_detailed_locs)

    loc_trees = dbtrees.loctree(location_set_id=location_set_id,
                                gbd_round_id=gbd_round_id,
                                decomp_step=decomp_step,
                                return_many=multiple_tree_flag)
    if not multiple_tree_flag:
        loc_trees = [loc_trees]

    draw_source = DrawSource(params={
        'draw_dir': str(constants_path),
        'file_pattern': cov_estimate_filename
    })

    i = 1
    output_filenames = []
    for loc_tree in loc_trees:
        output_filename = f'{location_set_id}_{i}.h5'
        i += 1
        draw_sink = DrawSink(params={
            'draw_dir': str(constants_path),
            'file_pattern': output_filename
        })
        draw_sink.add_transform(
            _apply_regional_scalars,
            regional_scalars_df=scalars.query('location_id in @region_locs'),
            gbd_round_id=gbd_round_id,
            decomp_step=decomp_step)

        op = Sum(index_cols=[s for s in index_cols if s != 'location_id'],
                 value_cols=[mmr_constants.Columns.LIVE_BIRTH_VALUE_COL])

        AggSynchronous(
            draw_source=draw_source,
            draw_sink=draw_sink,
            index_cols=[s for s in index_cols if s != 'location_id'],
            aggregate_col='location_id',
            operator=op).run(loc_tree, include_leaves=True)

        output_filenames.append(output_filename)

    return output_filenames