Python AggMemEff примеры использования

Язык программирования: Python

Пространство имен/Пакет: aggregator.aggregators

Класс/Тип: AggMemEff

Примеров на hotexamples.com: 5

Python AggMemEff - 5 примеров найдено. Это лучшие примеры Python кода для aggregator.aggregators.AggMemEff, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

AggMemEff(5)

run(4)

Основные методы

AggMemEff (5)

run (4)

Пример #1

Показать файл

Файл: location_agg_task.py Проект: cheth-rowe/ihmexp

    def run_task(self, location_set_version_id, component):
        source = self.get_source(component)
        sink = self.get_sink(component)
        dimensions = self.dimensions.get_dimension_by_component(
            component, self.measure_id)

        # get the tree we are aggregating
        loc_trees = dbtrees.loctree(
            location_set_version_id=location_set_version_id, return_many=True)
        for loc_tree in loc_trees:

            # get the weight vals
            pop = get_population(
                self.como_version,
                age_group_id=dimensions.index_dim.get_level("age_group_id"),
                location_id=[node.id for node in loc_tree.nodes],
                year_id=dimensions.index_dim.get_level("year_id"),
                sex_id=dimensions.index_dim.get_level("sex_id"))
            pop = pop[[
                "age_group_id", "location_id", "year_id", "sex_id",
                "population"
            ]]

            # set up our aggregation operator
            operator = WtdSum(index_cols=[
                col for col in dimensions.index_names if col != "location_id"
            ],
                              value_cols=dimensions.data_list(),
                              weight_df=pop,
                              weight_name="population",
                              merge_cols=[
                                  "location_id", "year_id", "age_group_id",
                                  "sex_id"
                              ])

            # run our aggregation
            aggregator = AggMemEff(draw_source=source,
                                   draw_sink=sink,
                                   index_cols=[
                                       col for col in dimensions.index_names
                                       if col != "location_id"
                                   ],
                                   aggregate_col="location_id",
                                   operator=operator,
                                   chunksize=self.chunksize[component])

            # run the tree
            aggregator.run(loc_tree,
                           draw_filters={
                               "measure_id": [self.measure_id],
                               "year_id":
                               dimensions.index_dim.get_level("year_id"),
                               "sex_id":
                               dimensions.index_dim.get_level("sex_id")
                           },
                           n_processes=self.chunksize[component])

Пример #2

Показать файл

    def get_dataframe(self):
        start_time = time.time()
        logger.info("START aggregate locations, time = {}".format(start_time))

        AggMemEff(self.draw_source, self.draw_sink, self.index_cols,
                  'location_id', self.operator, chunksize=2
                  ).run(self.loctree, include_leaves=False, n_processes=8,
                        draw_filters=self.draw_filters)

        end_time = time.time()
        logger.info("location aggregation complete, time = {}"
                    .format(end_time))
        elapsed = end_time - start_time
        logger.info("DONE location agg pipeline at {}, "
                    "elapsed seconds= {}".format(end_time, elapsed))
        logger.info("{}".format(SUCCESS_LOG_MESSAGE))

Пример #3

Показать файл

Файл: location_aggregation.py Проект: cheth-rowe/ihmexp

def aggregate_locations(aggregation_type: str, parent_dir: str,
                        measure_id: int, gbd_round_id: int,
                        location_set_id: int, year_id: int) -> None:
    """
    Uses a AggMemEff aggregator to aggregate locations for deaths and
    YLLs.

    Arguments:
        aggregation_type (str): the type of data to be aggregated up a
            location hierarchy. One of 'aggregated/rescaled',
            'aggregated/shocks', 'aggregated/unscaled'
            'scaled', or 'unaggregated/shocks'.
        parent_dir (str): parent fauxcorrect directory
            e.g. PATH/{version}
        measure_id (int): measure ID for deaths or YLLs
        gbd_round_id (int): GBD round ID for this fauxcorrect run
        location_set_id (int): location set ID with which to aggregate
        year_id (int): draws year ID

    Raises:
        ValueError: if measure_id is not deaths (1) or YLLs (4)
    """
    # Set up DrawSource and DrawSink.
    source_dir, sink_dir = _get_draw_source_sink_dirs(parent_dir,
                                                      aggregation_type,
                                                      measure_id)
    source, draw_filters = _get_draw_source_and_filters(
        aggregation_type, source_dir, year_id, measure_id)

    sink = DrawSink({
        'draw_dir':
        sink_dir,
        'file_pattern':
        FilePaths.LOCATION_AGGREGATE_FILE_PATTERN.format(year_id=year_id),
        'h5_tablename':
        Keys.DRAWS
    })
    sink.add_transform(_apply_regional_scalars, parent_dir, gbd_round_id,
                       location_set_id)
    sink.add_transform(add_measure_id_to_sink, measure_id=measure_id)

    # clean up old files we plan on writing
    clean_aggregation_directory(root_dir=sink.params['draw_dir'],
                                file_pattern=sink.params['file_pattern'],
                                location_set_id=location_set_id,
                                gbd_round_id=gbd_round_id)

    # Set up aggregator and location tree.
    index_cols = ([col for col in Columns.INDEX if col != Columns.LOCATION_ID])
    operator = Sum(index_cols, Columns.DRAWS)

    agg = AggMemEff(source,
                    sink,
                    index_cols,
                    Columns.LOCATION_ID,
                    operator,
                    chunksize=2)
    is_sdi_set = location_set_id == LocationSetId.SDI
    trees = loctree(location_set_id=location_set_id,
                    gbd_round_id=gbd_round_id,
                    return_many=is_sdi_set)

    logging.info(f"Aggregating locations, location_set_id: {location_set_id}")
    for tree in np.atleast_1d(trees):
        agg.run(tree, draw_filters=draw_filters, n_processes=10)

Пример #4

Показать файл

    ]
    draw_cols = ['draw_{}'.format(i) for i in range(n_draws)]

    for lsid in location_set_id:
        popfile = os.path.join(drawdir, 'population_{}.csv'.format(lsid))
        population = pd.read_csv(popfile)

        # aggregation operator
        operator = WtdSum(
            index_cols=index_cols,
            value_cols=draw_cols,
            weight_df=population,
            weight_name='population',
            merge_cols=['location_id', 'year_id', 'age_group_id', 'sex_id'])
        # run aggregation
        aggregator = AggMemEff(draw_source=source,
                               draw_sink=sink,
                               index_cols=index_cols,
                               aggregate_col='location_id',
                               operator=operator)

        if lsid == 40:
            loc_trees = loctree(location_set_id=lsid,
                                gbd_round_id=gbd_round_id,
                                return_many=True)
            for tree in loc_trees:
                aggregator.run(tree, draw_filters={'rei_id': rei_id})
        else:
            loc_tree = loctree(location_set_id=lsid, gbd_round_id=gbd_round_id)
            aggregator.run(loc_tree, draw_filters={'rei_id': rei_id})

Пример #5

Показать файл

        region_locs = region_locs[region_locs.level == 2].location_id.tolist()
        draw_sink.add_transform(apply_regional_scalars,
                                region_locs=region_locs,
                                parent_dir=parent_dir)
        draw_sink.add_transform(transform_add_measure, measure_id=measure_id)

        # create operator
        logging.info("Reading regional scalars from flatfiles.")
        index_cols = [col for col in index_cols if col != 'location_id']
        operator = Sum(index_cols, draw_cols)

        # Aggregate
        logging.info("Instantiate aggregator.aggregators.AggMemEff.")
        aggregator = AggMemEff(draw_source=draw_source,
                               draw_sink=draw_sink,
                               index_cols=index_cols,
                               aggregate_col='location_id',
                               operator=operator,
                               chunksize=2)

        logging.info("Create location tree(s).")
        is_sdi_set = False
        if location_set_id == 40:
            is_sdi_set = True
        tree = loctree(location_set_id=location_set_id,
                       gbd_round_id=GBD.GBD_ROUND_ID,
                       return_many=is_sdi_set)
        logging.info("Run aggregator.")
        for t in np.atleast_1d(tree):
            aggregator.run(t,
                           draw_filters={
                               'measure_id': measure_id,