Python DrawSource примеры, draw_sources.draw_sources.DrawSource Python примеры использования

Пример #1

0

Показать файл

Файл: summarize.py Проект: cheth-rowe/ihmexp

 def compute_percent(self):
     mem_source = DrawSource({
         "draw_dict": self._io_mock,
         "name": "cause"
     }, mem_read_func)
     denom = mem_source.content(filters={"metric_id": metrics.RATE})
     self._compute_percent(denom_df=denom)

Пример #2

0

Показать файл

def read_shocks_draw_files(parent_dir, location_id):
    """Reads in rescaled draw files."""
    params = {
        'draw_dir': os.path.join(parent_dir, 'aggregated/shocks'),
        'file_pattern': '1_{location_id}_{year_id}.h5'
    }
    source = DrawSource(params)
    return source.content(filters={'location_id': location_id})

Пример #3

0

Показать файл

Файл: summarize.py Проект: cheth-rowe/ihmexp

 def estimate_single_component(self, component):
     # data to summarize
     draw_source = DrawSource({
         "draw_dict": self.io_mock,
         "name": component
     }, mem_read_func)
     df = draw_source.content()
     df = compute_estimates(df, point_estimate="mean")
     df.rename(columns={"mean": "val"}, inplace=True)
     return df

Пример #4

0

Показать файл

Файл: append_diagnostics.py Проект: zhouxm4/ihme-modeling

def read_aggregated_rescaled(parent_dir, location_id, diag_years):
    """ Read in location aggregates of rescaled draws for deaths only"""
    rescaled_params = {
        'draw_dir': os.path.join(parent_dir, 'draws'),
        'file_pattern': '{measure_id}_{location_id}.h5'
    }
    ds = DrawSource(rescaled_params)
    rescaled_draws = ds.content(filters={
        'location_id': location_id,
        'year_id': diag_years,
        'measure_id': 1
    })
    return rescaled_draws

Пример #5

0

Показать файл

Файл: data_source.py Проект: cheth-rowe/ihmexp

    def _load_data_frame(self):
        """If 'turn_off_null_check' is true then the null check will be skipped.
        Yuk. GBD 2015 como files have nulls caused by "other maternal" issues
        for males.  Generally it is much safer to validate data, this is
        dangerous but historically necessary.

        This will pass-through NoDrawsError exception raised by the underlying
        SuperGopher implementation if it cannot find any files.

        Will raise ValueError if no files exist. ValueError is used to be
        consistent with other DataSource methods
        """

        logger.debug('Super gopher _load_data_frame, kwargs:')
        for key, value in self.kwargs.items():
            value = list(np.atleast_1d(value))
            self.kwargs[key] = value
            logger.debug("    {} == {}".format(key, value))
        self.kwargs.update({'strict_filter_checking': True})

        try:
            pattern = self.file_naming_conventions['file_pattern']
            draw_dir = self.dir_path
            h5_tablename = self.file_naming_conventions.get(
                'h5_tablename', None)
            params = {'file_pattern': pattern, 'draw_dir': draw_dir}
            if h5_tablename:
                params.update({'h5_tablename': h5_tablename})

            if not self.ds:
                ds = DrawSource(params=params)
                self.ds = ds
            df = ds.content(filters=self.kwargs)
            df = self._add_n_draws(df)

        except ex.InvalidFilter:
            logger.info(
                "Super gopher '{}' found no files with file_pattern: {}"
                ", draw_dir: {}, and filters {}. Stopping pipeline"
                "".format(self.name, pattern, draw_dir, self.kwargs))
            raise

        logger.info('Super gopher "{}" got content, shape {}'.format(
            self.name, df.shape))

        logger.debug(
            ('SuperGopher "{}" got and validated data, dir={}, filter='
             '{}'.format(self.name, self.dir_path,
                         self.file_naming_conventions)))
        return df

Пример #6

0

Показать файл

def read_aggregated_rescaled(parent_dir, location_id, diag_years):
    """ Read in location aggregates of rescaled draws for deaths only"""
    rescaled_params = {
        'draw_dir': os.path.join(parent_dir, FilePaths.DRAWS_DIR),
        'file_pattern': FilePaths.DRAWS_FILE_PATTERN
    }
    ds = DrawSource(rescaled_params)
    rescaled_draws = ds.content(
        filters={
            'location_id': location_id,
            'year_id': diag_years,
            'measure_id': Measures.Ids.DEATHS
        })
    return rescaled_draws

Пример #7

0

Показать файл

def read_gbd_draw_files(parent_dir, location_id, years, measure_id):
    """Pull in all data to be summarized for gbd, by location and measure, and
    filtering by years."""
    logger = logging.getLogger('summary.read_gbd_draw_files')
    try:
        params = {
            'draw_dir': os.path.join(parent_dir, 'draws'),
            'file_pattern': '{measure_id}_{location_id}.h5'
        }
        ds = DrawSource(params)
        return ds.content(filters={'measure_id': measure_id,
                                   'location_id': location_id,
                                   'year_id': years})
    except Exception as e:
        logger.exception('Failed to read location: {}'.format(e))

Пример #8

0

Показать файл

Файл: correct.py Проект: cheth-rowe/ihmexp

def _read_unscaled_draws(
        parent_dir: str,
        location_id: int,
        sex_id: int
) -> pd.DataFrame:
    """Read unscaled draws for given location, sex, and year"""
    draw_dir = join(
        parent_dir,
        constants.FilePaths.UNAGGREGATED_DIR,
        constants.FilePaths.UNSCALED_DIR,
        constants.FilePaths.DEATHS_DIR
    )
    file_pattern = constants.FilePaths.UNSCALED_DRAWS_FILE_PATTERN

    try:
        draws = DrawSource({
            'draw_dir': draw_dir,
            'file_pattern': file_pattern,
            'h5_tablename': constants.Keys.DRAWS,
            'num_workers': constants.DAG.Tasks.Cores.APPLY_CORRECTION
        }).content(filters={
            constants.Columns.LOCATION_ID: location_id,
            constants.Columns.SEX_ID: sex_id
        })
    except InvalidSpec:
        raise FileNotFoundError(
            f"Draw files were not found for location: {location_id} and sex: "
            f"{sex_id}."
        )
    return draws

Пример #9

0

Показать файл

def _read_scaled_draws(
        tool_name: str,
        parent_dir: str,
        location_id: int,
        year_id: int,
        measure_id: int
) -> pd.DataFrame:
    if tool_name == GBD.Process.Name.FAUXCORRECT:
        draw_dir = os.path.join(
            parent_dir,
            FilePaths.DRAWS_SCALED_DIR,
            str(measure_id)
        )
        file_pattern = FilePaths.SUMMARY_INPUT_FILE_PATTERN.format(
            location_id=location_id, year_id=year_id)
    else:
        draw_dir = os.path.join(
            parent_dir,
            FilePaths.DRAWS_DIR)
        file_pattern = FilePaths.SUMMARY_AGGREGATE_READ_PATTERN.format(
            measure_id=measure_id, location_id=location_id, year_id=year_id)
    return DrawSource({
        'draw_dir': draw_dir,
        'file_pattern': file_pattern,
        'h5_tablename': Keys.DRAWS
    }).content()

Пример #10

0

Показать файл

Файл: location_aggregation.py Проект: cheth-rowe/ihmexp

def _get_draw_source_and_filters(aggregation_type: str, source_dir: str,
                                 year_id: int, measure_id: int) -> DrawSource:
    if aggregation_type == LocationAggregation.Type.UNAGGREGATED_SHOCKS:
        source = DrawSource(
            params={
                'draw_dir': source_dir,
                'file_pattern': FilePaths.UNAGGREGATED_SHOCKS_FILE_PATTERN
            })
        draw_filters = {
            Columns.MEASURE_ID: measure_id,
            Columns.YEAR_ID: year_id
        }
    else:
        source = DrawSource({
            'draw_dir':
            source_dir,
            'file_pattern':
            FilePaths.LOCATION_AGGREGATE_FILE_PATTERN.format(year_id=year_id)
        })
        draw_filters = {}
    return source, draw_filters

Пример #11

0

Показать файл

def read_cod_draw_files(pool, parent_dir, location_id, years):
    """Pull in all data to be summarized for CoD, by location and filtering by
    years."""
    logger = logging.getLogger('summary.read_cod_draw_files')
    try:
        agg_rescaled_params = {
            'draw_dir': os.path.join(parent_dir, 'aggregated/rescaled'),
            'file_pattern': '{measure_id}_{location_id}_{year_id}.h5'
        }
        ds = DrawSource(agg_rescaled_params)
        rescaled_draws = ds.content(filters={'location_id': location_id,
                                             'year_id': years,
                                             'measure_id': 1})

        daly_draw_params = {
            'draw_dir': os.path.join(parent_dir, 'draws'),
            'file_pattern': '{measure_id}_{location_id}.h5',
        }
        ds = DrawSource(daly_draw_params)
        dalynator_draws = ds.content(filters={'location_id': location_id,
                                              'year_id': years,
                                              'measure_id': 1})
        return rescaled_draws, dalynator_draws
    except Exception as e:
        logger.exception('Failed to read location: {}'.format(e))

Пример #12

0

Показать файл

Файл: summarize.py Проект: cheth-rowe/ihmexp

    def _import_static_draws_by_component(self, component):
        dim = self.dimensions.get_dimension_by_component(
            component, self.measure_id)
        draw_dir = os.path.join(self.como_version.como_dir, "draws", component,
                                str(dim.index_dim.get_level("location_id")[0]))
        real_source = DrawSource({
            "draw_dir":
            draw_dir,
            "file_pattern":
            "{measure_id}_{year_id}_{sex_id}.h5"
        })
        real_source.add_transform(add_metric)
        fake_sink = DrawSink({
            "draw_dict": self.io_mock,
            "name": component
        }, mem_write_func)

        # get the filters and data
        sim_dim = self.dimensions.get_simulation_dimensions(self.measure_id)
        filters = sim_dim.index_dim.levels.copy()
        filters["age_group_id"] = dim.index_dim.get_level("age_group_id")
        df = real_source.content(filters=filters)
        fake_sink.push(df)

Пример #13

0

Показать файл

    try:
        # Read in helper files
        logging.info("Reading in helper files.")
        config = read_helper_files(parent_dir)
        # Read in config variables
        index_cols = config['index_columns']
        draw_cols = config['data_columns']
        sex_id = config['eligible_sex_ids']

        # Create draw source/sink
        logging.info("Creating draw source and sink.")
        draw_dir = os.path.join(parent_dir, 'aggregated/{}'.format(df_type))
        input_pattern = '{measure_id}_{location_id}_{year_id}.h5'
        source_config = {'draw_dir': draw_dir, 'file_pattern': input_pattern}
        draw_source = DrawSource(source_config)

        output_pattern = '{measure_id}_{location_id}_{year_id}.h5'
        sink_config = {
            'draw_dir': draw_dir,
            'file_pattern': output_pattern,
            'h5_tablename': 'draws'
        }
        draw_sink = DrawSink(sink_config)

        # Apply regional scalar transform
        region_locs = get_location_metadata(gbd_round_id=GBD.GBD_ROUND_ID,
                                            location_set_id=35)
        region_locs = region_locs[region_locs.level == 2].location_id.tolist()
        draw_sink.add_transform(apply_regional_scalars,
                                region_locs=region_locs,

Пример #14

0

Показать файл

Файл: summarize.py Проект: cheth-rowe/ihmexp

        multi_file = os.path.join(outdir, 'multi_year_{}.csv'.format(location_id))
        multi_year.to_csv(multi_file, index=False)
        os.chmod(multi_file, 0o775)


if __name__ == '__main__':
    (sev_version_id, location_id, gbd_round_id, year_id,
     change_intervals) = parse_arguments()

    drawdir = 'FILEPATH/{}/draws/'.format(sev_version_id)
    outdir = 'FILEPATH/sev/{}/summaries/'.format(sev_version_id)

    # identify rei_ids from the csvs in the draw_dir
    files = glob(os.path.join(drawdir, '*'))
    rei_ids = [int(os.path.basename(file)) for file in files
               if 'population' not in file and 'params' not in file]

    # Instantiate draw source
    source = DrawSource(
        params={'draw_dir': drawdir,
                'file_pattern': '{rei_id}/{location_id}.csv'})

    summarize_loc(source,
                  drawdir,
                  outdir,
                  location_id,
                  year_id,
                  rei_ids,
                  change_intervals=change_intervals,
                  gbd_round_id=gbd_round_id)

Пример #15

0

Показать файл